From fead94c1c287c05e0cf47149036bbe86d335be66 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@cs.washington.edu>
Date: Tue, 20 Feb 2024 16:02:03 -0800
Subject: [PATCH 01/26] reformat and track grad norm

---
 allenact/algorithms/onpolicy_sync/engine.py   | 172 +++++++++++-------
 .../onpolicy_sync/losses/a2cacktr.py          |  10 +-
 .../losses/grouped_action_imitation.py        |   4 +-
 .../onpolicy_sync/losses/imitation.py         |  12 +-
 .../algorithms/onpolicy_sync/losses/ppo.py    |  28 +--
 allenact/algorithms/onpolicy_sync/runner.py   |  79 ++++----
 allenact/algorithms/onpolicy_sync/storage.py  |  27 ++-
 .../onpolicy_sync/vector_sampled_tasks.py     |  24 ++-
 allenact/base_abstractions/distributions.py   |   3 +-
 allenact/base_abstractions/misc.py            |  22 ++-
 allenact/base_abstractions/sensor.py          |  18 +-
 allenact/embodiedai/aux_losses/losses.py      |  45 +++--
 allenact/embodiedai/mapping/mapping_losses.py |   8 +-
 .../mapping_models/active_neural_slam.py      |  47 +++--
 .../mapping/mapping_utils/map_builders.py     |  40 ++--
 .../mapping_utils/point_cloud_utils.py        |   3 +-
 allenact/embodiedai/models/basic_models.py    |   9 +-
 allenact/embodiedai/models/fusion_models.py   |   3 +-
 allenact/embodiedai/models/resnet.py          |  52 +++++-
 .../embodiedai/models/visual_nav_models.py    |   4 +-
 allenact/embodiedai/storage/vdr_storage.py    |  10 +-
 allenact/main.py                              |  10 +-
 allenact/setup.py                             |   2 +-
 allenact/utils/experiment_utils.py            |  30 ++-
 allenact/utils/misc_utils.py                  |   8 +-
 allenact/utils/model_utils.py                 |  31 +++-
 allenact/utils/spaces_utils.py                |  13 +-
 allenact/utils/system.py                      |   5 +-
 allenact/utils/viz_utils.py                   |  23 ++-
 .../babyai_plugin/babyai_models.py            |  71 +++++---
 .../babyai_plugin/babyai_tasks.py             |   4 +-
 .../clip_plugin/clip_preprocessors.py         |   4 +-
 allenact_plugins/gym_plugin/gym_models.py     |   7 +-
 allenact_plugins/gym_plugin/gym_tasks.py      |   6 +-
 .../habitat_plugin/habitat_constants.py       |   5 +-
 .../habitat_plugin/habitat_environment.py     |   5 +-
 .../habitat_plugin/habitat_preprocessors.py   |   1 -
 .../habitat_plugin/habitat_utils.py           |   9 +-
 .../ithor_plugin/ithor_environment.py         |  22 ++-
 .../ithor_plugin/ithor_sensors.py             |  32 +++-
 .../ithor_plugin/ithor_task_samplers.py       |  12 +-
 allenact_plugins/ithor_plugin/ithor_tasks.py  |  12 +-
 allenact_plugins/ithor_plugin/ithor_util.py   |   4 +-
 .../lighthouse_environment.py                 |   4 +-
 .../lighthouse_plugin/lighthouse_sensors.py   |  11 +-
 .../lighthouse_plugin/lighthouse_tasks.py     |  10 +-
 .../lighthouse_plugin/lighthouse_util.py      |  11 +-
 .../arm_calculation_utils.py                  |   1 +
 .../manipulathor_constants.py                 |   1 +
 .../manipulathor_environment.py               |  16 +-
 .../manipulathor_sensors.py                   |  23 ++-
 .../manipulathor_task_samplers.py             |  11 +-
 .../manipulathor_plugin/manipulathor_tasks.py |  20 +-
 .../manipulathor_plugin/manipulathor_viz.py   |   8 +-
 .../minigrid_plugin/minigrid_offpolicy.py     |   5 +-
 .../minigrid_plugin/minigrid_sensors.py       |   4 +-
 .../minigrid_plugin/minigrid_tasks.py         |  22 ++-
 .../navigation_plugin/objectnav/models.py     |  33 +++-
 .../navigation_plugin/pointnav/models.py      |   1 +
 .../robothor_plugin/robothor_environment.py   |  10 +-
 .../robothor_plugin/robothor_models.py        |   6 +-
 .../robothor_plugin/robothor_sensors.py       |   9 +-
 .../robothor_plugin/robothor_task_samplers.py |  20 +-
 .../robothor_plugin/robothor_tasks.py         |  11 +-
 projects/babyai_baselines/experiments/base.py |   8 +-
 .../experiments/go_to_local/a2c.py            |   7 +-
 .../experiments/go_to_local/base.py           |  12 +-
 .../experiments/go_to_local/bc.py             |   7 +-
 .../go_to_local/bc_teacher_forcing.py         |   8 +-
 .../experiments/go_to_local/dagger.py         |   4 +-
 .../experiments/go_to_local/ppo.py            |   7 +-
 .../experiments/go_to_obj/a2c.py              |   7 +-
 .../experiments/go_to_obj/base.py             |  14 +-
 .../experiments/go_to_obj/bc.py               |   7 +-
 .../go_to_obj/bc_teacher_forcing.py           |   8 +-
 .../experiments/go_to_obj/dagger.py           |   4 +-
 .../experiments/go_to_obj/ppo.py              |   7 +-
 .../experiments/gym_mujoco_ddppo.py           |   3 +-
 .../experiments/armpointnav_thor_base.py      |   6 +-
 .../models/arm_pointnav_models.py             |   1 +
 .../models/disjoint_arm_pointnav_models.py    |   1 +
 .../models/manipulathor_net_utils.py          |  21 ++-
 ...lipresnet50gru_ddppo_increasingrollouts.py |   6 +-
 .../habitat/objectnav_habitat_base.py         |   4 +-
 ...objectnav_ithor_depth_resnet18gru_ddppo.py |   4 +-
 .../objectnav_ithor_rgb_resnet18gru_ddppo.py  |   4 +-
 .../objectnav_ithor_rgbd_resnet18gru_ddppo.py |   4 +-
 .../experiments/objectnav_thor_base.py        |  12 +-
 ...othor_rgb_unfrozenresnet18gru_vdr_ddppo.py |  11 +-
 ...jectnav_robothor_rgb_resnet18gru_dagger.py |   4 +-
 projects/objectnav_baselines/mixins.py        |  85 ++++++---
 .../experiments/pointnav_thor_base.py         |  11 +-
 ...tnav_robothor_depth_simpleconvgru_ddppo.py |   4 +-
 ...intnav_robothor_rgb_simpleconvgru_ddppo.py |   4 +-
 ...ntnav_robothor_rgbd_simpleconvgru_ddppo.py |   4 +-
 projects/pointnav_baselines/mixins.py         |   6 +-
 .../distributed_objectnav_tutorial.py         |  18 +-
 projects/tutorials/gym_mujoco_tutorial.py     |   3 +-
 projects/tutorials/gym_tutorial.py            |   9 +-
 projects/tutorials/minigrid_tutorial.py       |   1 +
 projects/tutorials/minigrid_tutorial_conds.py |  10 +-
 .../navtopartner_robothor_rgb_ppo.py          |  24 ++-
 ...ct_nav_ithor_dagger_then_ppo_one_object.py |   9 +-
 ...av_ithor_dagger_then_ppo_one_object_viz.py |   7 +-
 .../object_nav_ithor_ppo_one_object.py        |  14 +-
 .../tutorials/pointnav_habitat_rgb_ddppo.py   |   4 +-
 .../tutorials/pointnav_ithor_rgb_ddppo.py     |   6 +-
 .../tutorials/running_inference_tutorial.py   |   9 +-
 .../tutorials/training_a_pointnav_model.py    |   7 +-
 scripts/dcommand.py                           |   3 +-
 scripts/dconfig.py                            |   3 +-
 scripts/dkill.py                              |   3 +-
 scripts/dmain.py                              |   2 +-
 scripts/literate.py                           |   1 +
 .../test_minigrid_conditional.py              |   4 +-
 tests/mapping/test_ai2thor_mapping.py         |  75 +++++---
 tests/sync_algs_cpu/test_to_to_obj_trains.py  |   4 +-
 tests/utils/test_spaces.py                    |   5 +-
 118 files changed, 1139 insertions(+), 550 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index 9526be7ca..84b2194b7 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -1,4 +1,5 @@
 """Defines the reinforcement learning `OnPolicyRLEngine`."""
+
 import datetime
 import logging
 import numbers
@@ -17,6 +18,7 @@
 import torch.multiprocessing as mp  # type: ignore
 import torch.nn as nn
 import torch.optim as optim
+
 # noinspection PyProtectedMember
 from torch._C._distributed_c10d import ReduceOp
 
@@ -196,16 +198,17 @@ def __init__(
 
         create_model_kwargs = {}
         if self.machine_params.sensor_preprocessor_graph is not None:
-            self.sensor_preprocessor_graph = self.machine_params.sensor_preprocessor_graph.to(
-                self.device
+            self.sensor_preprocessor_graph = (
+                self.machine_params.sensor_preprocessor_graph.to(self.device)
+            )
+            create_model_kwargs["sensor_preprocessor_graph"] = (
+                self.sensor_preprocessor_graph
             )
-            create_model_kwargs[
-                "sensor_preprocessor_graph"
-            ] = self.sensor_preprocessor_graph
 
         set_seed(self.seed)
         self.actor_critic = cast(
-            ActorCriticModel, self.config.create_model(**create_model_kwargs),
+            ActorCriticModel,
+            self.config.create_model(**create_model_kwargs),
         ).to(self.device)
 
         if initial_model_state_dict is not None:
@@ -262,9 +265,11 @@ def __init__(
                 world_size=self.num_workers,
                 # During testing, we sometimes found that default timeout was too short
                 # resulting in the run terminating surprisingly, we increase it here.
-                timeout=datetime.timedelta(minutes=3000)
-                if (self.mode == TEST_MODE_STR or DEBUGGING)
-                else dist.default_pg_timeout,
+                timeout=(
+                    datetime.timedelta(minutes=3000)
+                    if (self.mode == TEST_MODE_STR or DEBUGGING)
+                    else dist.default_pg_timeout
+                ),
             )
             self.is_distributed = True
 
@@ -284,9 +289,9 @@ def __init__(
         self.optimizer: Optional[optim.optimizer.Optimizer] = None
         # noinspection PyProtectedMember
         self.lr_scheduler: Optional[_LRScheduler] = None
-        self.insufficient_data_for_update: Optional[
-            torch.distributed.PrefixStore
-        ] = None
+        self.insufficient_data_for_update: Optional[torch.distributed.PrefixStore] = (
+            None
+        )
 
         # Training pipeline will be instantiated during training and inference.
         # During inference however, it will be instantiated anew on each run of `run_eval`
@@ -326,9 +331,9 @@ def vector_tasks(
                 make_sampler_fn=self.config.make_sampler_fn,
                 sampler_fn_args=self.get_sampler_fn_args(seeds),
                 callback_sensors=self.callback_sensors,
-                multiprocessing_start_method="forkserver"
-                if self.mp_ctx is None
-                else None,
+                multiprocessing_start_method=(
+                    "forkserver" if self.mp_ctx is None else None
+                ),
                 mp_ctx=self.mp_ctx,
                 max_processes=self.max_sampler_processes_per_worker,
                 read_timeout=DEBUG_VST_TIMEOUT if DEBUGGING else 1 * 60,
@@ -343,7 +348,7 @@ def worker_seeds(nprocesses: int, initial_seed: Optional[int]) -> List[int]:
         if initial_seed is not None:
             rstate = random.getstate()
             random.seed(initial_seed)
-        seeds = [random.randint(0, (2 ** 31) - 1) for _ in range(nprocesses)]
+        seeds = [random.randint(0, (2**31) - 1) for _ in range(nprocesses)]
         if initial_seed is not None:
             random.setstate(rstate)
         return seeds
@@ -400,7 +405,8 @@ def checkpoint_load(
             ckpt = torch.load(os.path.abspath(ckpt), map_location="cpu")
 
         ckpt = cast(
-            Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]], ckpt,
+            Dict[str, Union[Dict[str, Any], torch.Tensor, float, int, str, List]],
+            ckpt,
         )
 
         self.actor_critic.load_state_dict(ckpt["model_state_dict"])  # type:ignore
@@ -414,7 +420,9 @@ def checkpoint_load(
 
     # aggregates task metrics currently in queue
     def aggregate_task_metrics(
-        self, logging_pkg: LoggingPackage, num_tasks: int = -1,
+        self,
+        logging_pkg: LoggingPackage,
+        num_tasks: int = -1,
     ) -> LoggingPackage:
         if num_tasks > 0:
             if len(self.single_process_metrics) != num_tasks:
@@ -652,7 +660,8 @@ def collect_step_across_all_task_samplers(
     ) -> int:
         rollout_storage = cast(RolloutStorage, uuid_to_storage[rollout_storage_uuid])
         actions, actor_critic_output, memory, _ = self.act(
-            rollout_storage=rollout_storage, dist_wrapper_class=dist_wrapper_class,
+            rollout_storage=rollout_storage,
+            dist_wrapper_class=dist_wrapper_class,
         )
 
         # Flatten actions
@@ -687,7 +696,9 @@ def collect_step_across_all_task_samplers(
         observations, rewards, dones, infos = [list(x) for x in zip(*outputs)]
 
         rewards = torch.tensor(
-            rewards, dtype=torch.float, device=self.device,  # type:ignore
+            rewards,
+            dtype=torch.float,
+            device=self.device,  # type:ignore
         )
 
         # We want rewards to have dimensions [sampler, reward]
@@ -701,7 +712,9 @@ def collect_step_across_all_task_samplers(
         masks = (
             1.0
             - torch.tensor(
-                dones, dtype=torch.float32, device=self.device,  # type:ignore
+                dones,
+                dtype=torch.float32,
+                device=self.device,  # type:ignore
             )
         ).view(
             -1, 1
@@ -726,9 +739,9 @@ def collect_step_across_all_task_samplers(
                     s.sampler_select(keep)
 
         to_add_to_storage = dict(
-            observations=self._preprocess_observations(batch)
-            if len(keep) > 0
-            else batch,
+            observations=(
+                self._preprocess_observations(batch) if len(keep) > 0 else batch
+            ),
             memory=self._active_memory(memory, keep),
             actions=flat_actions[0, keep],
             action_log_probs=actor_critic_output.distributions.log_prob(actions)[
@@ -802,7 +815,6 @@ def step_count(self) -> int:
             return 0
         return self.training_pipeline.current_stage.steps_taken_in_stage
 
-
     def compute_losses_track_them_and_backprop(
         self,
         stage: PipelineStage,
@@ -819,9 +831,9 @@ def compute_losses_track_them_and_backprop(
                 "insufficient_data_for_update", str(0)
             )
             dist.barrier(
-                device_ids=None
-                if self.device == torch.device("cpu")
-                else [self.device.index]
+                device_ids=(
+                    None if self.device == torch.device("cpu") else [self.device.index]
+                )
             )
 
         training_settings = stage_component.training_settings
@@ -909,9 +921,11 @@ def single_batch_generator(streaming_storage: StreamingStorageMixin):
                         1 * (not enough_data_for_update),
                     )
                     dist.barrier(
-                        device_ids=None
-                        if self.device == torch.device("cpu")
-                        else [self.device.index]
+                        device_ids=(
+                            None
+                            if self.device == torch.device("cpu")
+                            else [self.device.index]
+                        )
                     )
 
                     if (
@@ -1043,9 +1057,9 @@ def single_batch_generator(streaming_storage: StreamingStorageMixin):
                     to_track["lr"] = self.optimizer.param_groups[0]["lr"]
 
                 if training_settings.num_mini_batch is not None:
-                    to_track[
-                        "rollout_num_mini_batch"
-                    ] = training_settings.num_mini_batch
+                    to_track["rollout_num_mini_batch"] = (
+                        training_settings.num_mini_batch
+                    )
 
                 for k, v in to_track.items():
                     # We need to set the bsize to 1 for `worker_batch_size` below as we're trying to record the
@@ -1062,19 +1076,28 @@ def single_batch_generator(streaming_storage: StreamingStorageMixin):
                     )
 
                 if not skip_backprop:
-                    self.backprop_step(
+                    total_grad_norm = self.backprop_step(
                         total_loss=total_loss,
                         max_grad_norm=training_settings.max_grad_norm,
                         local_to_global_batch_size_ratio=bsize / aggregate_bsize,
                     )
+                    self.tracking_info_list.append(
+                        TrackingInfo(
+                            type=TrackingInfoType.UPDATE_INFO,
+                            info={"total_grad_norm": total_grad_norm.item()},
+                            n=bsize,
+                            storage_uuid=stage_component.storage_uuid,
+                            stage_component_uuid=stage_component.uuid,
+                        )
+                    )
 
-                stage.stage_component_uuid_to_stream_memory[
-                    stage_component.uuid
-                ] = detach_recursively(
-                    input=stage.stage_component_uuid_to_stream_memory[
-                        stage_component.uuid
-                    ],
-                    inplace=True,
+                stage.stage_component_uuid_to_stream_memory[stage_component.uuid] = (
+                    detach_recursively(
+                        input=stage.stage_component_uuid_to_stream_memory[
+                            stage_component.uuid
+                        ],
+                        inplace=True,
+                    )
                 )
 
     def close(self, verbose=True):
@@ -1216,8 +1239,10 @@ def __init__(
                 "offpolicy_epoch_done", self.store
             )
             # Flag for finished worker in current epoch with custom component
-            self.insufficient_data_for_update = torch.distributed.PrefixStore(  # type:ignore
-                "insufficient_data_for_update", self.store
+            self.insufficient_data_for_update = (
+                torch.distributed.PrefixStore(  # type:ignore
+                    "insufficient_data_for_update", self.store
+                )
             )
         else:
             self.num_workers_done = None
@@ -1243,7 +1268,7 @@ def advance_seed(
         if seed is None:
             return seed
         seed = (seed ^ (self.training_pipeline.total_steps + 1)) % (
-            2 ** 31 - 1
+            2**31 - 1
         )  # same seed for all workers
 
         if (not return_same_seed_per_worker) and (
@@ -1321,9 +1346,11 @@ def checkpoint_save(self, pipeline_stage_index: Optional[int] = None) -> str:
             self.checkpoints_dir,
             "exp_{}__stage_{:02d}__steps_{:012d}.pt".format(
                 self.experiment_name,
-                self.training_pipeline.current_stage_index
-                if pipeline_stage_index is None
-                else pipeline_stage_index,
+                (
+                    self.training_pipeline.current_stage_index
+                    if pipeline_stage_index is None
+                    else pipeline_stage_index
+                ),
                 self.training_pipeline.total_steps,
             ),
         )
@@ -1375,7 +1402,9 @@ def step_count(self, val: int) -> None:
 
     @property
     def log_interval(self):
-        return self.training_pipeline.current_stage.training_settings.metric_accumulate_interval
+        return (
+            self.training_pipeline.current_stage.training_settings.metric_accumulate_interval
+        )
 
     @property
     def approx_steps(self):
@@ -1416,7 +1445,8 @@ def tracking_callback(type: TrackingInfoType, info: Dict[str, Any], n: int):
             )
 
         actions, actor_critic_output, memory, step_observation = super().act(
-            rollout_storage=rollout_storage, dist_wrapper_class=dist_wrapper_class,
+            rollout_storage=rollout_storage,
+            dist_wrapper_class=dist_wrapper_class,
         )
 
         self.step_count += self.num_active_samplers
@@ -1474,17 +1504,27 @@ def backprop_step(
                     else:  # local_global_batch_size_tuple is not None, since we're distributed:
                         p.grad = p.grad * local_to_global_batch_size_ratio
                     reductions.append(
-                        dist.all_reduce(p.grad, async_op=True,)  # sum
+                        dist.all_reduce(
+                            p.grad,
+                            async_op=True,
+                        )  # sum
                     )  # synchronize
                     all_params.append(p)
             for reduction, p in zip(reductions, all_params):
                 reduction.wait()
 
+        if hasattr(self.actor_critic, "compute_total_grad_norm"):
+            total_grad_norm = self.actor_critic.compute_total_grad_norm()
+        else:
+            total_grad_norm = 0.0
+
         nn.utils.clip_grad_norm_(
-            self.actor_critic.parameters(), max_norm=max_grad_norm,  # type: ignore
+            self.actor_critic.parameters(),
+            max_norm=max_grad_norm,  # type: ignore
         )
 
         self.optimizer.step()  # type: ignore
+        return total_grad_norm
 
     def _save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter(
         self, pipeline_stage_index: Optional[int] = None
@@ -1561,10 +1601,11 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
                     )
                 ):
                     self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter(
-                        pipeline_stage_index=self.training_pipeline.current_stage_index
-                        - 1
-                        if not training_is_complete
-                        else len(self.training_pipeline.pipeline_stages) - 1
+                        pipeline_stage_index=(
+                            self.training_pipeline.current_stage_index - 1
+                            if not training_is_complete
+                            else len(self.training_pipeline.pipeline_stages) - 1
+                        )
                     )
 
                 # If training is complete, break out
@@ -1599,9 +1640,11 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
                 self.num_workers_steps.set("steps", str(0))
                 # Ensure all workers are done before incrementing num_workers_{steps, done}
                 dist.barrier(
-                    device_ids=None
-                    if self.device == torch.device("cpu")
-                    else [self.device.index]
+                    device_ids=(
+                        None
+                        if self.device == torch.device("cpu")
+                        else [self.device.index]
+                    )
                 )
 
             self.former_steps = self.step_count
@@ -1716,9 +1759,11 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
 
                     # Ensure all workers are done before updating step counter
                     dist.barrier(
-                        device_ids=None
-                        if self.device == torch.device("cpu")
-                        else [self.device.index]
+                        device_ids=(
+                            None
+                            if self.device == torch.device("cpu")
+                            else [self.device.index]
+                        )
                     )
 
                     ndone = int(self.num_workers_done.get("done"))
@@ -2097,7 +2142,8 @@ def run_eval(
                     lengths: List[int]
                     if self.num_active_samplers > 0:
                         lengths = self.vector_tasks.command(
-                            "sampler_attr", ["length"] * self.num_active_samplers,
+                            "sampler_attr",
+                            ["length"] * self.num_active_samplers,
                         )
                         npending = sum(lengths)
                     else:
diff --git a/allenact/algorithms/onpolicy_sync/losses/a2cacktr.py b/allenact/algorithms/onpolicy_sync/losses/a2cacktr.py
index 936cd1889..82d632949 100644
--- a/allenact/algorithms/onpolicy_sync/losses/a2cacktr.py
+++ b/allenact/algorithms/onpolicy_sync/losses/a2cacktr.py
@@ -1,4 +1,5 @@
 """Implementation of A2C and ACKTR losses."""
+
 from typing import cast, Tuple, Dict, Optional
 
 import torch
@@ -99,7 +100,9 @@ def loss(  # type: ignore
         **kwargs,
     ):
         losses_per_step = self.loss_per_step(
-            step_count=step_count, batch=batch, actor_critic_output=actor_critic_output,
+            step_count=step_count,
+            batch=batch,
+            actor_critic_output=actor_critic_output,
         )
         losses = {
             key: (loss.mean(), weight)
@@ -169,4 +172,7 @@ def __init__(
         )
 
 
-A2CConfig = dict(value_loss_coef=0.5, entropy_coef=0.01,)
+A2CConfig = dict(
+    value_loss_coef=0.5,
+    entropy_coef=0.01,
+)
diff --git a/allenact/algorithms/onpolicy_sync/losses/grouped_action_imitation.py b/allenact/algorithms/onpolicy_sync/losses/grouped_action_imitation.py
index 17c93ccff..116f4abae 100644
--- a/allenact/algorithms/onpolicy_sync/losses/grouped_action_imitation.py
+++ b/allenact/algorithms/onpolicy_sync/losses/grouped_action_imitation.py
@@ -68,4 +68,6 @@ def loss(  # type: ignore
             torch.log((probs_tensor * expert_group_actions_mask).sum(-1))
         ).mean()
 
-        return total_loss, {"grouped_action_cross_entropy": total_loss.item(),}
+        return total_loss, {
+            "grouped_action_cross_entropy": total_loss.item(),
+        }
diff --git a/allenact/algorithms/onpolicy_sync/losses/imitation.py b/allenact/algorithms/onpolicy_sync/losses/imitation.py
index f9459a735..7683bfed8 100644
--- a/allenact/algorithms/onpolicy_sync/losses/imitation.py
+++ b/allenact/algorithms/onpolicy_sync/losses/imitation.py
@@ -149,7 +149,9 @@ def loss(  # type: ignore
                     ready_actions[group_name] = expert_action
 
                     current_loss, expert_successes = self.group_loss(
-                        cd, expert_action, expert_action_masks,
+                        cd,
+                        expert_action,
+                        expert_action_masks,
                     )
 
                     should_report_loss = (
@@ -204,7 +206,9 @@ def loss(  # type: ignore
             )
         return (
             total_loss,
-            {"expert_cross_entropy": total_loss.item(), **losses}
-            if should_report_loss
-            else {},
+            (
+                {"expert_cross_entropy": total_loss.item(), **losses}
+                if should_report_loss
+                else {}
+            ),
         )
diff --git a/allenact/algorithms/onpolicy_sync/losses/ppo.py b/allenact/algorithms/onpolicy_sync/losses/ppo.py
index 6f787644f..3d995c122 100644
--- a/allenact/algorithms/onpolicy_sync/losses/ppo.py
+++ b/allenact/algorithms/onpolicy_sync/losses/ppo.py
@@ -115,15 +115,17 @@ def add_trailing_dims(t: torch.Tensor):
                 "action": (action_loss, None),
                 "entropy": (dist_entropy.mul_(-1.0), self.entropy_coef),  # type: ignore
             },
-            {
-                "ratio": ratio,
-                "ratio_clamped": clamped_ratio,
-                "ratio_used": torch.where(
-                    cast(torch.Tensor, use_clamped), clamped_ratio, ratio
-                ),
-            }
-            if self.show_ratios
-            else {},
+            (
+                {
+                    "ratio": ratio,
+                    "ratio_clamped": clamped_ratio,
+                    "ratio_used": torch.where(
+                        cast(torch.Tensor, use_clamped), clamped_ratio, ratio
+                    ),
+                }
+                if self.show_ratios
+                else {}
+            ),
         )
 
     def loss(  # type: ignore
@@ -135,7 +137,9 @@ def loss(  # type: ignore
         **kwargs
     ):
         losses_per_step, ratio_info = self.loss_per_step(
-            step_count=step_count, batch=batch, actor_critic_output=actor_critic_output,
+            step_count=step_count,
+            batch=batch,
+            actor_critic_output=actor_critic_output,
         )
         losses = {
             key: (loss.mean(), weight)
@@ -210,7 +214,9 @@ def loss(  # type: ignore
 
         return (
             value_loss,
-            {"value": value_loss.item(),},
+            {
+                "value": value_loss.item(),
+            },
         )
 
 
diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index 5302d1984..67f322d58 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -1,4 +1,5 @@
 """Defines the reinforcement learning `OnPolicyRunner`."""
+
 import copy
 import enum
 import glob
@@ -542,9 +543,9 @@ def start_train(
                 config=self.config,
                 callback_sensors=self._get_callback_sensors,
                 results_queue=self.queues["results"],
-                checkpoints_queue=self.queues["checkpoints"]
-                if self.running_validation
-                else None,
+                checkpoints_queue=(
+                    self.queues["checkpoints"] if self.running_validation else None
+                ),
                 checkpoints_dir=self.checkpoint_dir(),
                 seed=self.seed,
                 deterministic_cudnn=self.deterministic_cudnn,
@@ -555,9 +556,9 @@ def start_train(
                 distributed_port=distributed_port,
                 max_sampler_processes_per_worker=max_sampler_processes_per_worker,
                 save_ckpt_after_every_pipeline_stage=save_ckpt_after_every_pipeline_stage,
-                initial_model_state_dict=initial_model_state_dict
-                if model_hash is None
-                else model_hash,
+                initial_model_state_dict=(
+                    initial_model_state_dict if model_hash is None else model_hash
+                ),
                 first_local_worker_id=worker_ids[0],
                 distributed_preemption_threshold=self.distributed_preemption_threshold,
                 valid_on_initial_weights=valid_on_initial_weights,
@@ -782,9 +783,11 @@ def checkpoint_dir(
         self, start_time_str: Optional[str] = None, create_if_none: bool = True
     ):
         path_parts = [
-            self.config.tag()
-            if self.extra_tag == ""
-            else os.path.join(self.config.tag(), self.extra_tag),
+            (
+                self.config.tag()
+                if self.extra_tag == ""
+                else os.path.join(self.config.tag(), self.extra_tag)
+            ),
             start_time_str or self.local_start_time_str,
         ]
         if self.save_dir_fmt == SaveDirFormat.NESTED:
@@ -816,9 +819,11 @@ def log_writer_path(self, start_time_str: str) -> str:
                 )
             path = os.path.join(
                 self.output_dir,
-                self.config.tag()
-                if self.extra_tag == ""
-                else os.path.join(self.config.tag(), self.extra_tag),
+                (
+                    self.config.tag()
+                    if self.extra_tag == ""
+                    else os.path.join(self.config.tag(), self.extra_tag)
+                ),
                 start_time_str,
                 "train_tb",
             )
@@ -827,9 +832,11 @@ def log_writer_path(self, start_time_str: str) -> str:
             path = os.path.join(
                 self.output_dir,
                 "tb",
-                self.config.tag()
-                if self.extra_tag == ""
-                else os.path.join(self.config.tag(), self.extra_tag),
+                (
+                    self.config.tag()
+                    if self.extra_tag == ""
+                    else os.path.join(self.config.tag(), self.extra_tag)
+                ),
                 start_time_str,
             )
             if self.mode == TEST_MODE_STR:
@@ -850,9 +857,11 @@ def metric_path(self, start_time_str: str) -> str:
             return os.path.join(
                 self.output_dir,
                 "metrics",
-                self.config.tag()
-                if self.extra_tag == ""
-                else os.path.join(self.config.tag(), self.extra_tag),
+                (
+                    self.config.tag()
+                    if self.extra_tag == ""
+                    else os.path.join(self.config.tag(), self.extra_tag)
+                ),
                 start_time_str,
             )
         else:
@@ -860,9 +869,11 @@ def metric_path(self, start_time_str: str) -> str:
 
     def save_project_state(self):
         path_parts = [
-            self.config.tag()
-            if self.extra_tag == ""
-            else os.path.join(self.config.tag(), self.extra_tag),
+            (
+                self.config.tag()
+                if self.extra_tag == ""
+                else os.path.join(self.config.tag(), self.extra_tag)
+            ),
             self.local_start_time_str,
         ]
         if self.save_dir_fmt == SaveDirFormat.NESTED:
@@ -1091,12 +1102,12 @@ def update_keys_metric(
                             f" AllenAct, please report this issue at https://github.com/allenai/allenact/issues."
                         )
                     else:
-                        scalar_name_to_total_storage_experience[
-                            scalar_name
-                        ] = total_exp_for_storage
-                        scalar_name_to_total_experiences_key[
-                            scalar_name
-                        ] = storage_uuid_to_total_experiences_key[storage_uuid]
+                        scalar_name_to_total_storage_experience[scalar_name] = (
+                            total_exp_for_storage
+                        )
+                        scalar_name_to_total_experiences_key[scalar_name] = (
+                            storage_uuid_to_total_experiences_key[storage_uuid]
+                        )
 
         assert all_equal(
             checkpoint_file_name
@@ -1156,9 +1167,9 @@ def update_keys_metric(
                             stage_component_uuid,
                         )
                         callback_metric_means[approx_eps_key] = eps
-                        scalar_name_to_total_experiences_key[
-                            approx_eps_key
-                        ] = storage_uuid_to_total_experiences_key[storage_uuid]
+                        scalar_name_to_total_experiences_key[approx_eps_key] = (
+                            storage_uuid_to_total_experiences_key[storage_uuid]
+                        )
 
                         if log_writer is not None:
                             log_writer.add_scalar(
@@ -1358,9 +1369,11 @@ def log_and_close(
                                 self.process_valid_package(
                                     log_writer=log_writer,
                                     pkg=package,
-                                    all_results=eval_results
-                                    if self._collect_valid_results
-                                    else None,
+                                    all_results=(
+                                        eval_results
+                                        if self._collect_valid_results
+                                        else None
+                                    ),
                                 )
 
                                 if metrics_file is not None:
diff --git a/allenact/algorithms/onpolicy_sync/storage.py b/allenact/algorithms/onpolicy_sync/storage.py
index bb023c459..60bb36e19 100644
--- a/allenact/algorithms/onpolicy_sync/storage.py
+++ b/allenact/algorithms/onpolicy_sync/storage.py
@@ -121,7 +121,8 @@ def empty(self) -> bool:
 class MiniBatchStorageMixin(abc.ABC):
     @abc.abstractmethod
     def batched_experience_generator(
-        self, num_mini_batch: int,
+        self,
+        num_mini_batch: int,
     ) -> Generator[Dict[str, Any], None, None]:
         raise NotImplementedError
 
@@ -183,7 +184,8 @@ def initialize(
             self.action_space = action_space
 
             self.memory_first_last: Memory = self.create_memory(
-                spec=self.memory_specification, num_samplers=num_samplers,
+                spec=self.memory_specification,
+                num_samplers=num_samplers,
             ).to(self.device)
             for key in self.memory_specification:
                 self.flattened_to_unflattened["memory"][key] = [key]
@@ -249,7 +251,10 @@ def observations(self) -> Memory:
         return self._observations_full.slice(dim=0, start=0, stop=self.step + 1)
 
     @staticmethod
-    def create_memory(spec: Optional[FullMemorySpecType], num_samplers: int,) -> Memory:
+    def create_memory(
+        spec: Optional[FullMemorySpecType],
+        num_samplers: int,
+    ) -> Memory:
         if spec is None:
             return Memory()
 
@@ -290,7 +295,9 @@ def to(self, device: torch.device):
         self.device = device
 
     def insert_observations(
-        self, observations: ObservationType, time_step: int,
+        self,
+        observations: ObservationType,
+        time_step: int,
     ):
         self.insert_tensors(
             storage=self._observations_full,
@@ -300,7 +307,9 @@ def insert_observations(
         )
 
     def insert_memory(
-        self, memory: Optional[Memory], time_step: int,
+        self,
+        memory: Optional[Memory],
+        time_step: int,
     ):
         if memory is None:
             assert len(self.memory_first_last) == 0
@@ -519,7 +528,10 @@ def before_updates(
     ):
         assert len(kwargs) == 0
         self.compute_returns(
-            next_value=next_value, use_gae=use_gae, gamma=gamma, tau=tau,
+            next_value=next_value,
+            use_gae=use_gae,
+            gamma=gamma,
+            tau=tau,
         )
 
         self._advantages = self.returns[:-1] - self.value_preds[:-1]
@@ -587,7 +599,8 @@ def compute_returns(
                 )
 
     def batched_experience_generator(
-        self, num_mini_batch: int,
+        self,
+        num_mini_batch: int,
     ):
         assert self._before_update_called, (
             "self._before_update_called() must be called before"
diff --git a/allenact/algorithms/onpolicy_sync/vector_sampled_tasks.py b/allenact/algorithms/onpolicy_sync/vector_sampled_tasks.py
index 122409a3c..0b17e28f8 100644
--- a/allenact/algorithms/onpolicy_sync/vector_sampled_tasks.py
+++ b/allenact/algorithms/onpolicy_sync/vector_sampled_tasks.py
@@ -376,7 +376,9 @@ def _task_sampling_loop_worker(
                     else:
                         connection_write_fn(
                             sp_vector_sampled_tasks.command_at(
-                                sampler_index=sampler_index, command=command, data=data,
+                                sampler_index=sampler_index,
+                                command=command,
+                                data=data,
                             )
                         )
                 else:
@@ -500,7 +502,9 @@ def get_observations(self):
 
         List of observations for each of the unpaused tasks.
         """
-        return self.call(["get_observations"] * self.num_unpaused_tasks,)
+        return self.call(
+            ["get_observations"] * self.num_unpaused_tasks,
+        )
 
     def command_at(
         self, sampler_index: int, command: str, data: Optional[Any] = None
@@ -689,9 +693,9 @@ def pause_at(self, sampler_index: int) -> None:
         for i in range(
             sampler_index + 1, len(self.sampler_index_to_process_ind_and_subprocess_ind)
         ):
-            other_process_and_sub_process_inds = self.sampler_index_to_process_ind_and_subprocess_ind[
-                i
-            ]
+            other_process_and_sub_process_inds = (
+                self.sampler_index_to_process_ind_and_subprocess_ind[i]
+            )
             if other_process_and_sub_process_inds[0] == process_ind:
                 other_process_and_sub_process_inds[1] -= 1
             else:
@@ -988,9 +992,9 @@ def _task_sampling_loop_generator_fn(
                             )
                             if step_result.info is None:
                                 step_result = step_result.clone({"info": {}})
-                            step_result.info[
-                                COMPLETE_TASK_CALLBACK_KEY
-                            ] = task_callback_data
+                            step_result.info[COMPLETE_TASK_CALLBACK_KEY] = (
+                                task_callback_data
+                            )
 
                         if auto_resample_when_done:
                             current_task = task_sampler.next_task()
@@ -1140,7 +1144,9 @@ def get_observations(self):
 
         List of observations for each of the unpaused tasks.
         """
-        return self.call(["get_observations"] * self.num_unpaused_tasks,)
+        return self.call(
+            ["get_observations"] * self.num_unpaused_tasks,
+        )
 
     def next_task_at(self, index_process: int) -> List[RLStepResult]:
         """Move to the the next Task from the TaskSampler in index_process
diff --git a/allenact/base_abstractions/distributions.py b/allenact/base_abstractions/distributions.py
index 51a8662c8..2cfb98572 100644
--- a/allenact/base_abstractions/distributions.py
+++ b/allenact/base_abstractions/distributions.py
@@ -210,8 +210,7 @@ def log_prob(
 
 
 class TrackingCallback(Protocol):
-    def __call__(self, type: TrackingInfoType, info: Dict[str, Any], n: int):
-        ...
+    def __call__(self, type: TrackingInfoType, info: Dict[str, Any], n: int): ...
 
 
 class TeacherForcingDistr(Distr):
diff --git a/allenact/base_abstractions/misc.py b/allenact/base_abstractions/misc.py
index 07f239653..31e85895e 100644
--- a/allenact/base_abstractions/misc.py
+++ b/allenact/base_abstractions/misc.py
@@ -28,9 +28,11 @@ class RLStepResult(NamedTuple):
 
     def clone(self, new_info: Dict[str, Any]):
         return RLStepResult(
-            observation=self.observation
-            if "observation" not in new_info
-            else new_info["observation"],
+            observation=(
+                self.observation
+                if "observation" not in new_info
+                else new_info["observation"]
+            ),
             reward=self.reward if "reward" not in new_info else new_info["reward"],
             done=self.done if "done" not in new_info else new_info["done"],
             info=self.info if "info" not in new_info else new_info["info"],
@@ -38,9 +40,9 @@ def clone(self, new_info: Dict[str, Any]):
 
     def merge(self, other: "RLStepResult"):
         return RLStepResult(
-            observation=self.observation
-            if other.observation is None
-            else other.observation,
+            observation=(
+                self.observation if other.observation is None else other.observation
+            ),
             reward=self.reward if other.reward is None else other.reward,
             done=self.done if other.done is None else other.done,
             info={
@@ -328,11 +330,15 @@ def slice(
                 )
                 sliced_tensor = tensor[slice_tuple]
                 res.check_append(
-                    key=key, tensor=sliced_tensor, sampler_dim=self.sampler_dim(key),
+                    key=key,
+                    tensor=sliced_tensor,
+                    sampler_dim=self.sampler_dim(key),
                 )
             else:
                 res.check_append(
-                    key, tensor, self.sampler_dim(key),
+                    key,
+                    tensor,
+                    self.sampler_dim(key),
                 )
 
         return res
diff --git a/allenact/base_abstractions/sensor.py b/allenact/base_abstractions/sensor.py
index 2d7b9b101..ed317e5bf 100644
--- a/allenact/base_abstractions/sensor.py
+++ b/allenact/base_abstractions/sensor.py
@@ -185,7 +185,14 @@ def __init__(
         self.group_spaces = (
             self.action_space
             if self.use_groups
-            else OrderedDict([(self._NO_GROUPS_LABEL, self.action_space,)])
+            else OrderedDict(
+                [
+                    (
+                        self._NO_GROUPS_LABEL,
+                        self.action_space,
+                    )
+                ]
+            )
         )
 
         self.expert_args: Dict[str, Any] = expert_args or {}
@@ -230,7 +237,10 @@ def flagged_space(
         else:
             return gym.spaces.Dict(
                 [
-                    (group_space, cls.flagged_group_space(action_space[group_space]),)
+                    (
+                        group_space,
+                        cls.flagged_group_space(action_space[group_space]),
+                    )
                     for group_space in cast(gym.spaces.Dict, action_space)
                 ]
             )
@@ -270,7 +280,9 @@ def flatten_output(self, unflattened):
 
     @abc.abstractmethod
     def query_expert(
-        self, task: SubTaskType, expert_sensor_group_name: Optional[str],
+        self,
+        task: SubTaskType,
+        expert_sensor_group_name: Optional[str],
     ) -> Tuple[Any, bool]:
         """Query the expert for the given task (and optional group name).
 
diff --git a/allenact/embodiedai/aux_losses/losses.py b/allenact/embodiedai/aux_losses/losses.py
index 1dee664d0..7ea8b29ac 100644
--- a/allenact/embodiedai/aux_losses/losses.py
+++ b/allenact/embodiedai/aux_losses/losses.py
@@ -114,7 +114,10 @@ def get_aux_loss(
 
 
 def _propagate_final_beliefs_to_all_steps(
-    beliefs: torch.Tensor, masks: torch.Tensor, num_sampler: int, num_steps: int,
+    beliefs: torch.Tensor,
+    masks: torch.Tensor,
+    num_sampler: int,
+    num_steps: int,
 ):
     final_beliefs = torch.zeros_like(beliefs)  # (T, B, *)
     start_locs_list = []
@@ -180,7 +183,10 @@ def get_aux_loss(
         masks = masks.squeeze(-1)  # (T, B)
 
         final_beliefs, _, _ = _propagate_final_beliefs_to_all_steps(
-            beliefs, masks, num_sampler, num_steps,
+            beliefs,
+            masks,
+            num_sampler,
+            num_steps,
         )
 
         ## compute CE loss
@@ -236,7 +242,9 @@ def get_aux_loss(
 
         return (
             avg_loss,
-            {"total": cast(torch.Tensor, avg_loss).item(),},
+            {
+                "total": cast(torch.Tensor, avg_loss).item(),
+            },
         )
 
 
@@ -275,7 +283,10 @@ def get_aux_loss(
             start_locs_list,
             end_locs_list,
         ) = _propagate_final_beliefs_to_all_steps(
-            beliefs, masks, num_sampler, num_steps,
+            beliefs,
+            masks,
+            num_sampler,
+            num_steps,
         )
 
         ## also find the locs_batch of shape (M, 3)
@@ -353,7 +364,9 @@ def get_aux_loss(
 
         return (
             avg_loss,
-            {"total": cast(torch.Tensor, avg_loss).item(),},
+            {
+                "total": cast(torch.Tensor, avg_loss).item(),
+            },
         )
 
 
@@ -502,21 +515,21 @@ def get_aux_loss(
             beliefs.device
         )  # (T+k, k, N, 1)
 
-        pred_masks[
-            num_steps - 1 :
-        ] = False  # GRU(b_t, a_{t:t+k-1}) is invalid when t >= T, as we don't have real z_{t+1}
+        pred_masks[num_steps - 1 :] = (
+            False  # GRU(b_t, a_{t:t+k-1}) is invalid when t >= T, as we don't have real z_{t+1}
+        )
         for j in range(1, self.planning_steps + 1):  # for j-step predictions
-            pred_masks[
-                : j - 1, j - 1
-            ] = False  # Remove the upper triangle above the diagnonal (but I think this is unnecessary for valid_masks)
+            pred_masks[: j - 1, j - 1] = (
+                False  # Remove the upper triangle above the diagnonal (but I think this is unnecessary for valid_masks)
+            )
             for n in range(num_sampler):
                 has_zeros_batch = torch.where(masks[:, n] == 0)[0]
                 # in j-step prediction, timesteps z -> z + j are disallowed as those are the first j timesteps of a new episode
                 # z-> z-1 because of pred_masks being offset by 1
                 for z in has_zeros_batch:
-                    pred_masks[
-                        z - 1 : z - 1 + j, j - 1, n
-                    ] = False  # can affect j timesteps
+                    pred_masks[z - 1 : z - 1 + j, j - 1, n] = (
+                        False  # can affect j timesteps
+                    )
 
         # instead of the whole range, we actually are only comparing a window i:i+k for each query/target i - for each, select the appropriate k
         # we essentially gather diagonals from this full mask, t of them, k long
@@ -682,7 +695,9 @@ def get_aux_loss(
 
         return (
             avg_multi_class_loss,
-            {"total": cast(torch.Tensor, avg_multi_class_loss).item(),},
+            {
+                "total": cast(torch.Tensor, avg_multi_class_loss).item(),
+            },
         )
 
 
diff --git a/allenact/embodiedai/mapping/mapping_losses.py b/allenact/embodiedai/mapping/mapping_losses.py
index 05138b1ff..aa5658c1d 100644
--- a/allenact/embodiedai/mapping/mapping_losses.py
+++ b/allenact/embodiedai/mapping/mapping_losses.py
@@ -14,7 +14,9 @@ class BinnedPointCloudMapLoss(AbstractActorCriticLoss):
     prediction."""
 
     def __init__(
-        self, binned_pc_uuid: str, map_logits_uuid: str,
+        self,
+        binned_pc_uuid: str,
+        map_logits_uuid: str,
     ):
         """Initializer.
 
@@ -135,8 +137,8 @@ def loss(  # type: ignore
 
         ego_map_gt = ego_map_gt.float()
         total_loss = -(
-            ego_map_gt * (log_p * (one_minus_p ** self.gamma))
-            + (1 - ego_map_gt) * (log_one_minus_p * (p ** self.gamma))
+            ego_map_gt * (log_p * (one_minus_p**self.gamma))
+            + (1 - ego_map_gt) * (log_one_minus_p * (p**self.gamma))
         ).mean()
 
         return (
diff --git a/allenact/embodiedai/mapping/mapping_models/active_neural_slam.py b/allenact/embodiedai/mapping/mapping_models/active_neural_slam.py
index 2fca9cd88..44794e151 100644
--- a/allenact/embodiedai/mapping/mapping_models/active_neural_slam.py
+++ b/allenact/embodiedai/mapping/mapping_models/active_neural_slam.py
@@ -142,7 +142,10 @@ def __init__(
             ), "When using layernorm, we require that set `freeze_resnet_batchnorm` to True."
             self.resnet_normalizer = nn.Sequential(
                 nn.Conv2d(512, 512, 1),
-                nn.LayerNorm(normalized_shape=[512, 7, 7], elementwise_affine=True,),
+                nn.LayerNorm(
+                    normalized_shape=[512, 7, 7],
+                    elementwise_affine=True,
+                ),
             )
             self.resnet_normalizer.apply(simple_conv_and_linear_weights_init)
         else:
@@ -305,13 +308,18 @@ def allocentric_map_to_egocentric_view(
                 1
             ).to(self.device)
             rotation_and_translate_mat = torch.cat(
-                (rot_mat, offset_to_top_of_image + offset_to_center_the_agent,), dim=-1,
+                (
+                    rot_mat,
+                    offset_to_top_of_image + offset_to_center_the_agent,
+                ),
+                dim=-1,
             )
 
             ego_map = F.grid_sample(
                 allocentric_map,
                 F.affine_grid(
-                    rotation_and_translate_mat.to(self.device), allocentric_map.shape,
+                    rotation_and_translate_mat.to(self.device),
+                    allocentric_map.shape,
                 ),
                 padding_mode=padding_mode,
                 align_corners=False,
@@ -353,7 +361,8 @@ def estimate_egocentric_dx_dz_dr(
 
     @staticmethod
     def update_allocentric_xzrs_with_egocentric_movement(
-        last_xzrs_allocentric: torch.Tensor, dx_dz_drs_egocentric: torch.Tensor,
+        last_xzrs_allocentric: torch.Tensor,
+        dx_dz_drs_egocentric: torch.Tensor,
     ):
         new_xzrs_allocentric = last_xzrs_allocentric.clone()
 
@@ -476,14 +485,18 @@ def forward(
             )
 
         if self.use_pose_estimation:
-            updated_xzrs_allocentrc = self.update_allocentric_xzrs_with_egocentric_movement(
-                last_xzrs_allocentric=last_xzrs_allocentric,
-                dx_dz_drs_egocentric=dx_dz_dr_egocentric_preds,
+            updated_xzrs_allocentrc = (
+                self.update_allocentric_xzrs_with_egocentric_movement(
+                    last_xzrs_allocentric=last_xzrs_allocentric,
+                    dx_dz_drs_egocentric=dx_dz_dr_egocentric_preds,
+                )
             )
         elif dx_dz_drs_egocentric is not None:
-            updated_xzrs_allocentrc = self.update_allocentric_xzrs_with_egocentric_movement(
-                last_xzrs_allocentric=last_xzrs_allocentric,
-                dx_dz_drs_egocentric=dx_dz_drs_egocentric,
+            updated_xzrs_allocentrc = (
+                self.update_allocentric_xzrs_with_egocentric_movement(
+                    last_xzrs_allocentric=last_xzrs_allocentric,
+                    dx_dz_drs_egocentric=dx_dz_drs_egocentric,
+                )
             )
         else:
             updated_xzrs_allocentrc = None
@@ -495,11 +508,13 @@ def forward(
             with torch.no_grad():
                 # Rotate and translate the egocentric map view, we do this grid sampling
                 # at the level of probabilities as bad results can occur at the logit level
-                full_size_allocentric_map_probs_update = _move_egocentric_map_view_into_allocentric_position(
-                    map_probs_egocentric=map_probs_egocentric,
-                    xzrs_allocentric=updated_xzrs_allocentrc,
-                    allocentric_map_height_width=(self.map_size, self.map_size),
-                    resolution_in_cm=self.resolution_in_cm,
+                full_size_allocentric_map_probs_update = (
+                    _move_egocentric_map_view_into_allocentric_position(
+                        map_probs_egocentric=map_probs_egocentric,
+                        xzrs_allocentric=updated_xzrs_allocentrc,
+                        allocentric_map_height_width=(self.map_size, self.map_size),
+                        resolution_in_cm=self.resolution_in_cm,
+                    )
                 )
 
                 map_probs_allocentric = torch.max(
@@ -575,7 +590,7 @@ def _move_egocentric_map_view_into_allocentric_position(
     )
     allo_h, allo_w = allocentric_map_height_width
 
-    max_view_range = math.sqrt((ego_w / 2.0) ** 2 + ego_h ** 2)
+    max_view_range = math.sqrt((ego_w / 2.0) ** 2 + ego_h**2)
     if min(allo_h, allo_w) / 2.0 < max_view_range:
         raise NotImplementedError(
             f"The shape of your egocentric view (ego_h, ego_w)==({ego_h, ego_w})"
diff --git a/allenact/embodiedai/mapping/mapping_utils/map_builders.py b/allenact/embodiedai/mapping/mapping_utils/map_builders.py
index fd5840151..188ddcedc 100644
--- a/allenact/embodiedai/mapping/mapping_utils/map_builders.py
+++ b/allenact/embodiedai/mapping/mapping_utils/map_builders.py
@@ -240,7 +240,10 @@ def update(
                     scaler
                     * (
                         torch.tensor(
-                            [camera_xyz[0], camera_xyz[2],],
+                            [
+                                camera_xyz[0],
+                                camera_xyz[2],
+                            ],
                             dtype=torch.float,
                             device=self.device,
                         ).unsqueeze(-1)
@@ -252,7 +255,10 @@ def update(
                     [0, 1.0]
                 ).unsqueeze(1).to(self.device)
                 rotation_and_translate_mat = torch.cat(
-                    (rot_mat, offset_to_top_of_image + offset_to_center_the_agent,),
+                    (
+                        rot_mat,
+                        offset_to_top_of_image + offset_to_center_the_agent,
+                    ),
                     dim=1,
                 )
 
@@ -283,9 +289,9 @@ def update(
                     :vr, (width_div_2 - vr_div_2) : (width_div_2 + vr_div_2), :
                 ]
 
-                to_return[
-                    "egocentric_local_context"
-                ] = egocentric_local_context.cpu().numpy()
+                to_return["egocentric_local_context"] = (
+                    egocentric_local_context.cpu().numpy()
+                )
 
             return to_return
 
@@ -437,15 +443,15 @@ def build_ground_truth_map(self, object_hulls: Sequence[ObjectHull2d]):
             if ot in self.object_type_to_index:
                 ind = self.object_type_to_index[ot]
 
-                self.ground_truth_semantic_map[
-                    :, :, ind : (ind + 1)
-                ] = cv2.fillConvexPoly(
-                    img=np.array(
-                        self.ground_truth_semantic_map[:, :, ind : (ind + 1)],
-                        dtype=np.uint8,
-                    ),
-                    points=self._xzs_to_colrows(np.array(object_hull.hull_points)),
-                    color=255,
+                self.ground_truth_semantic_map[:, :, ind : (ind + 1)] = (
+                    cv2.fillConvexPoly(
+                        img=np.array(
+                            self.ground_truth_semantic_map[:, :, ind : (ind + 1)],
+                            dtype=np.uint8,
+                        ),
+                        points=self._xzs_to_colrows(np.array(object_hull.hull_points)),
+                        color=255,
+                    )
                 )
 
     def update(
@@ -556,7 +562,11 @@ def update(
                 1
             ).to(self.device)
             rotation_and_translate_mat = torch.cat(
-                (rot_mat, offset_to_top_of_image + offset_to_center_the_agent,), dim=1,
+                (
+                    rot_mat,
+                    offset_to_top_of_image + offset_to_center_the_agent,
+                ),
+                dim=1,
             )
 
             ego_update_and_mask = F.grid_sample(
diff --git a/allenact/embodiedai/mapping/mapping_utils/point_cloud_utils.py b/allenact/embodiedai/mapping/mapping_utils/point_cloud_utils.py
index 1d1722c7f..b7e6e0509 100644
--- a/allenact/embodiedai/mapping/mapping_utils/point_cloud_utils.py
+++ b/allenact/embodiedai/mapping/mapping_utils/point_cloud_utils.py
@@ -292,7 +292,8 @@ def project_point_cloud_to_map(
 
     isvalid = torch.logical_and(
         torch.logical_and(
-            (uvw_points_binned >= 0).all(-1), (uvw_points_binned < maxes).all(-1),
+            (uvw_points_binned >= 0).all(-1),
+            (uvw_points_binned < maxes).all(-1),
         ),
         isnotnan,
     )
diff --git a/allenact/embodiedai/models/basic_models.py b/allenact/embodiedai/models/basic_models.py
index 4df7267c7..3db2e1567 100644
--- a/allenact/embodiedai/models/basic_models.py
+++ b/allenact/embodiedai/models/basic_models.py
@@ -1,5 +1,6 @@
 """Basic building block torch networks that can be used across a variety of
 tasks."""
+
 from typing import (
     Sequence,
     Dict,
@@ -475,7 +476,8 @@ def adapt_result(
         nsamplers: int,
         nagents: int,
     ) -> Tuple[
-        torch.FloatTensor, torch.FloatTensor,
+        torch.FloatTensor,
+        torch.FloatTensor,
     ]:
         output_dims = (nsteps, nsamplers) + ((nagents, -1) if obs_agent else (-1,))
         hidden_dims = (self.num_recurrent_layers, nsamplers) + (
@@ -483,7 +485,10 @@ def adapt_result(
         )
 
         outputs = cast(torch.FloatTensor, outputs.view(*output_dims))
-        hidden_states = cast(torch.FloatTensor, hidden_states.view(*hidden_dims),)
+        hidden_states = cast(
+            torch.FloatTensor,
+            hidden_states.view(*hidden_dims),
+        )
 
         return outputs, hidden_states
 
diff --git a/allenact/embodiedai/models/fusion_models.py b/allenact/embodiedai/models/fusion_models.py
index e93fc6f74..3dd54b4e7 100644
--- a/allenact/embodiedai/models/fusion_models.py
+++ b/allenact/embodiedai/models/fusion_models.py
@@ -36,7 +36,8 @@ def forward(
         obs_embeds = obs_embeds.view(num_steps * num_samplers, -1)
 
         weights = self.get_belief_weights(
-            all_beliefs=all_beliefs, obs_embeds=obs_embeds,  # (T*N, H, K)  # (T*N, Z)
+            all_beliefs=all_beliefs,
+            obs_embeds=obs_embeds,  # (T*N, H, K)  # (T*N, Z)
         ).unsqueeze(
             -1
         )  # (T*N, K, 1)
diff --git a/allenact/embodiedai/models/resnet.py b/allenact/embodiedai/models/resnet.py
index 8a2c76a2f..0620c6faa 100644
--- a/allenact/embodiedai/models/resnet.py
+++ b/allenact/embodiedai/models/resnet.py
@@ -39,7 +39,13 @@ class BasicBlock(nn.Module):
     resneXt = False
 
     def __init__(
-        self, inplanes, planes, ngroups, stride=1, downsample=None, cardinality=1,
+        self,
+        inplanes,
+        planes,
+        ngroups,
+        stride=1,
+        downsample=None,
+        cardinality=1,
     ):
         super(BasicBlock, self).__init__()
         self.convs = nn.Sequential(
@@ -105,11 +111,22 @@ class Bottleneck(nn.Module):
     resneXt = False
 
     def __init__(
-        self, inplanes, planes, ngroups, stride=1, downsample=None, cardinality=1,
+        self,
+        inplanes,
+        planes,
+        ngroups,
+        stride=1,
+        downsample=None,
+        cardinality=1,
     ):
         super().__init__()
         self.convs = _build_bottleneck_branch(
-            inplanes, planes, ngroups, stride, self.expansion, groups=cardinality,
+            inplanes,
+            planes,
+            ngroups,
+            stride,
+            self.expansion,
+            groups=cardinality,
         )
         self.relu = nn.ReLU(inplace=True)
         self.downsample = downsample
@@ -130,7 +147,13 @@ def forward(self, x):
 
 class SEBottleneck(Bottleneck):
     def __init__(
-        self, inplanes, planes, ngroups, stride=1, downsample=None, cardinality=1,
+        self,
+        inplanes,
+        planes,
+        ngroups,
+        stride=1,
+        downsample=None,
+        cardinality=1,
     ):
         super().__init__(inplanes, planes, ngroups, stride, downsample, cardinality)
 
@@ -192,7 +215,7 @@ def __init__(self, in_channels, base_planes, ngroups, block, layers, cardinality
         )
 
         self.final_channels = self.inplanes
-        self.final_spatial_compress = 1.0 / (2 ** 5)
+        self.final_spatial_compress = 1.0 / (2**5)
 
     def _make_layer(self, block, ngroups, planes, blocks, stride=1):
         downsample = None
@@ -337,7 +360,7 @@ def __init__(
             )  # fix bug in habitat that uses int()
             after_compression_flat_size = 2048
             num_compression_channels = int(
-                round(after_compression_flat_size / (final_spatial ** 2))
+                round(after_compression_flat_size / (final_spatial**2))
             )
             self.compression = nn.Sequential(
                 nn.Conv2d(
@@ -415,8 +438,21 @@ def forward(self, observations):
         x = self.head(x)  # (2048) -> (hidden_size)
 
         if nagents is not None:
-            x = x.reshape((nsteps, nsamplers, nagents,) + x.shape[1:])
+            x = x.reshape(
+                (
+                    nsteps,
+                    nsamplers,
+                    nagents,
+                )
+                + x.shape[1:]
+            )
         else:
-            x = x.reshape((nsteps, nsamplers,) + x.shape[1:])
+            x = x.reshape(
+                (
+                    nsteps,
+                    nsamplers,
+                )
+                + x.shape[1:]
+            )
 
         return x
diff --git a/allenact/embodiedai/models/visual_nav_models.py b/allenact/embodiedai/models/visual_nav_models.py
index e8804ef0e..345f0fb83 100644
--- a/allenact/embodiedai/models/visual_nav_models.py
+++ b/allenact/embodiedai/models/visual_nav_models.py
@@ -180,7 +180,9 @@ def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor:
         raise NotImplementedError("Obs Encoder Not Implemented")
 
     def fuse_beliefs(
-        self, beliefs_dict: Dict[str, torch.FloatTensor], obs_embeds: torch.FloatTensor,
+        self,
+        beliefs_dict: Dict[str, torch.FloatTensor],
+        obs_embeds: torch.FloatTensor,
     ) -> Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]:
         all_beliefs = torch.stack(list(beliefs_dict.values()), dim=-1)  # (T, N, H, k)
 
diff --git a/allenact/embodiedai/storage/vdr_storage.py b/allenact/embodiedai/storage/vdr_storage.py
index b4abcea37..fe7b3dbfe 100644
--- a/allenact/embodiedai/storage/vdr_storage.py
+++ b/allenact/embodiedai/storage/vdr_storage.py
@@ -50,7 +50,9 @@ def loss(
         stream_memory: Memory,
     ) -> LossOutput:
         action_logits = self.compute_action_logits_fn(
-            model=model, img0=batch[self.img0_key], img1=batch[self.img1_key],
+            model=model,
+            img0=batch[self.img0_key],
+            img1=batch[self.img1_key],
         )
         loss = F.cross_entropy(action_logits, target=batch[self.action_key])
         return LossOutput(
@@ -163,9 +165,9 @@ def add(
             for i, (a, m, action_success) in enumerate(
                 zip(actions, masks, action_successes)
             ):
-                actions_already_sampled_in_ep = self.task_sampler_to_actions_already_sampled[
-                    i
-                ]
+                actions_already_sampled_in_ep = (
+                    self.task_sampler_to_actions_already_sampled[i]
+                )
 
                 if (
                     m != 0
diff --git a/allenact/main.py b/allenact/main.py
index d1ad6d0b1..138b5c6f1 100755
--- a/allenact/main.py
+++ b/allenact/main.py
@@ -31,7 +31,8 @@ def get_argument_parser():
 
     # noinspection PyTypeChecker
     parser = argparse.ArgumentParser(
-        description="allenact", formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description="allenact",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
 
     parser.add_argument(
@@ -98,7 +99,12 @@ def get_argument_parser():
     )
 
     parser.add_argument(
-        "-s", "--seed", required=False, default=None, type=int, help="random seed",
+        "-s",
+        "--seed",
+        required=False,
+        default=None,
+        type=int,
+        help="random seed",
     )
     parser.add_argument(
         "-b",
diff --git a/allenact/setup.py b/allenact/setup.py
index 91f9389d3..a3075cabd 100644
--- a/allenact/setup.py
+++ b/allenact/setup.py
@@ -110,7 +110,7 @@ def _do_setup():
         license="MIT",
         packages=find_packages(include=["allenact", "allenact.*"]),
         install_requires=[
-            "gym==0.17.*", # Newer versions of gym are now broken with updates to setuptools
+            "gym==0.17.*",  # Newer versions of gym are now broken with updates to setuptools
             "torch>=1.6.0,!=1.8.0",
             "torchvision>=0.7.0",
             "tensorboardx>=2.1",
diff --git a/allenact/utils/experiment_utils.py b/allenact/utils/experiment_utils.py
index 4a16241b9..e165dc135 100644
--- a/allenact/utils/experiment_utils.py
+++ b/allenact/utils/experiment_utils.py
@@ -1,4 +1,5 @@
 """Utility classes and functions for running and designing experiments."""
+
 import abc
 import collections.abc
 import copy
@@ -248,9 +249,9 @@ def __init__(
         self.mode = mode
 
         self.training_steps: int = training_steps
-        self.storage_uuid_to_total_experiences: Dict[
-            str, int
-        ] = storage_uuid_to_total_experiences
+        self.storage_uuid_to_total_experiences: Dict[str, int] = (
+            storage_uuid_to_total_experiences
+        )
         self.pipeline_stage = pipeline_stage
 
         self.metrics_tracker = ScalarMeanTracker()
@@ -431,7 +432,10 @@ class EarlyStoppingCriterion(abc.ABC):
 
     @abc.abstractmethod
     def __call__(
-        self, stage_steps: int, total_steps: int, training_metrics: ScalarMeanTracker,
+        self,
+        stage_steps: int,
+        total_steps: int,
+        training_metrics: ScalarMeanTracker,
     ) -> bool:
         """Returns `True` if training should be stopped early.
 
@@ -451,7 +455,10 @@ class NeverEarlyStoppingCriterion(EarlyStoppingCriterion):
     """Implementation of `EarlyStoppingCriterion` which never stops early."""
 
     def __call__(
-        self, stage_steps: int, total_steps: int, training_metrics: ScalarMeanTracker,
+        self,
+        stage_steps: int,
+        total_steps: int,
+        training_metrics: ScalarMeanTracker,
     ) -> bool:
         return False
 
@@ -1029,10 +1036,12 @@ def before_rollout(self, train_metrics: Optional[ScalarMeanTracker] = None) -> b
             train_metrics is not None
             and self.current_stage.early_stopping_criterion is not None
         ):
-            self.current_stage.early_stopping_criterion_met = self.current_stage.early_stopping_criterion(
-                stage_steps=self.current_stage.steps_taken_in_stage,
-                total_steps=self.total_steps,
-                training_metrics=train_metrics,
+            self.current_stage.early_stopping_criterion_met = (
+                self.current_stage.early_stopping_criterion(
+                    stage_steps=self.current_stage.steps_taken_in_stage,
+                    total_steps=self.total_steps,
+                    training_metrics=train_metrics,
+                )
             )
         if self.current_stage.early_stopping_criterion_met:
             get_logger().debug(
@@ -1124,7 +1133,8 @@ def get_stage_storage(
         for storage_uuid in storage_uuids_for_current_stage:
             if isinstance(self._named_storages[storage_uuid], Builder):
                 self._named_storages[storage_uuid] = cast(
-                    Builder["ExperienceStorage"], self._named_storages[storage_uuid],
+                    Builder["ExperienceStorage"],
+                    self._named_storages[storage_uuid],
                 )()
 
         return OrderedDict(
diff --git a/allenact/utils/misc_utils.py b/allenact/utils/misc_utils.py
index 07df4e2a4..bef37d61b 100644
--- a/allenact/utils/misc_utils.py
+++ b/allenact/utils/misc_utils.py
@@ -41,7 +41,8 @@ def multiprocessing_safe_download_file_from_url(url: str, save_path: str):
         if not os.path.isfile(save_path):
             get_logger().info(f"Downloading file from {url} to {save_path}.")
             urllib.request.urlretrieve(
-                url, save_path,
+                url,
+                save_path,
             )
         else:
             get_logger().debug(f"{save_path} exists - skipping download.")
@@ -127,7 +128,10 @@ def tensor_print_options(**print_opts):
 
 
 def md5_hash_str_as_int(to_hash: str):
-    return int(hashlib.md5(to_hash.encode()).hexdigest(), 16,)
+    return int(
+        hashlib.md5(to_hash.encode()).hexdigest(),
+        16,
+    )
 
 
 def get_git_diff_of_project() -> Tuple[str, str]:
diff --git a/allenact/utils/model_utils.py b/allenact/utils/model_utils.py
index aa3cbe2d0..04b201db5 100644
--- a/allenact/utils/model_utils.py
+++ b/allenact/utils/model_utils.py
@@ -1,4 +1,5 @@
 """Functions used to initialize and manipulate pytorch models."""
+
 import hashlib
 from typing import Sequence, Tuple, Union, Optional, Dict, Any, Callable
 
@@ -18,7 +19,12 @@ def md5_hash_of_state_dict(state_dict: Dict[str, Any]):
                 p1 = piece[1].data.cpu().numpy()
             else:
                 p1 = piece[1]
-            hashables.append(int(hashlib.md5(p1.tobytes()).hexdigest(), 16,))
+            hashables.append(
+                int(
+                    hashlib.md5(p1.tobytes()).hexdigest(),
+                    16,
+                )
+            )
         else:
             hashables.append(md5_hash_str_as_int(str(piece)))
 
@@ -182,10 +188,21 @@ def compute_cnn_output(
 
     if nagents is not None:
         cnn_output = cnn_output.reshape(
-            (nsteps, nsamplers, nagents,) + cnn_output.shape[1:]
+            (
+                nsteps,
+                nsamplers,
+                nagents,
+            )
+            + cnn_output.shape[1:]
         )
     else:
-        cnn_output = cnn_output.reshape((nsteps, nsamplers,) + cnn_output.shape[1:])
+        cnn_output = cnn_output.reshape(
+            (
+                nsteps,
+                nsamplers,
+            )
+            + cnn_output.shape[1:]
+        )
 
     return cnn_output
 
@@ -233,7 +250,13 @@ def __init__(self, input_size, output_size):
             self.fc = nn.Embedding(input_size, output_size)
         else:  # automatically be moved to a device
             self.null_embedding: torch.Tensor
-            self.register_buffer("null_embedding", torch.zeros(0,), persistent=False)
+            self.register_buffer(
+                "null_embedding",
+                torch.zeros(
+                    0,
+                ),
+                persistent=False,
+            )
 
     def forward(self, inputs):
         if self.output_size != 0:
diff --git a/allenact/utils/spaces_utils.py b/allenact/utils/spaces_utils.py
index bd54edc9a..46593880a 100644
--- a/allenact/utils/spaces_utils.py
+++ b/allenact/utils/spaces_utils.py
@@ -167,12 +167,16 @@ def flatten_space(space: gym.Space):
     if isinstance(space, gym.MultiBinary):
         return gym.Box(low=0, high=1, shape=(space.n,))
     if isinstance(space, gym.MultiDiscrete):
-        return gym.Box(low=np.zeros_like(space.nvec), high=space.nvec,)
+        return gym.Box(
+            low=np.zeros_like(space.nvec),
+            high=space.nvec,
+        )
     raise NotImplementedError
 
 
 def policy_space(
-    action_space: gym.Space, box_space_to_policy: Callable[[gym.Box], gym.Space] = None,
+    action_space: gym.Space,
+    box_space_to_policy: Callable[[gym.Box], gym.Space] = None,
 ) -> gym.Space:
     if isinstance(action_space, gym.Box):
         if box_space_to_policy is None:
@@ -192,7 +196,10 @@ def policy_space(
     if isinstance(action_space, gym.Dict):
         # policy = dict of sub-policies
         spaces = [
-            (name, policy_space(s, box_space_to_policy),)
+            (
+                name,
+                policy_space(s, box_space_to_policy),
+            )
             for name, s in action_space.spaces.items()
         ]
         return gym.Dict(spaces)
diff --git a/allenact/utils/system.py b/allenact/utils/system.py
index 25bb065ba..a201cf994 100644
--- a/allenact/utils/system.py
+++ b/allenact/utils/system.py
@@ -170,7 +170,10 @@ def _set_log_formatter():
             datefmt = short_date_format
 
         if add_style_to_logs:
-            formatter = ColoredFormatter(fmt=fmt, datefmt=datefmt,)
+            formatter = ColoredFormatter(
+                fmt=fmt,
+                datefmt=datefmt,
+            )
         else:
             formatter = logging.Formatter(fmt=fmt, datefmt=datefmt)
 
diff --git a/allenact/utils/viz_utils.py b/allenact/utils/viz_utils.py
index a119af5b0..be5b34470 100644
--- a/allenact/utils/viz_utils.py
+++ b/allenact/utils/viz_utils.py
@@ -334,7 +334,9 @@ def __init__(
         **other_base_kwargs,
     ):
         super().__init__(
-            label, vector_task_sources=[vector_task_source], **other_base_kwargs,
+            label,
+            vector_task_sources=[vector_task_source],
+            **other_base_kwargs,
         )
         self.max_clip_length = max_clip_length
         self.max_video_length = max_video_length
@@ -388,7 +390,9 @@ def log(
             vid = self.make_vid(images)
             if vid is not None:
                 log_writer.add_vid(
-                    f"{self.mode}/{self.label}_group{page}", vid, global_step=num_steps,
+                    f"{self.mode}/{self.label}_group{page}",
+                    vid,
+                    global_step=num_steps,
                 )
 
     @staticmethod
@@ -728,9 +732,9 @@ def __init__(
             self.actor_critic_source,
         ) = self._setup_sources()
 
-        self.data: Dict[
-            str, List[Dict]
-        ] = {}  # dict of episode id to list of dicts with collected data
+        self.data: Dict[str, List[Dict]] = (
+            {}
+        )  # dict of episode id to list of dicts with collected data
         self.last_it2epid: List[str] = []
 
     def _setup_sources(self):
@@ -910,7 +914,9 @@ def _collect_rollout(self, rollout, alive):
 
                 # Select latest step
                 res = res.narrow(
-                    dim=0, start=rollout_step, length=1,  # step dimension
+                    dim=0,
+                    start=rollout_step,
+                    length=1,  # step dimension
                 )  # 1 x ... x sampler x ...
 
                 # get_logger().debug("basic collect h {}".format(res[..., 0]))
@@ -1056,7 +1062,10 @@ def __init__(
         self.experiment_to_test_events_paths_map = experiment_to_test_events_paths_map
         train_experiments = set(list(experiment_to_train_events_paths_map.keys()))
         test_experiments = set(list(experiment_to_test_events_paths_map.keys()))
-        assert (train_experiments - test_experiments) in [set(), train_experiments,], (
+        assert (train_experiments - test_experiments) in [
+            set(),
+            train_experiments,
+        ], (
             f"`experiment_to_test_events_paths_map` must have identical keys (experiment names) to those"
             f" in `experiment_to_train_events_paths_map`, or be empty."
             f" Got {train_experiments} train keys and {test_experiments} test keys."
diff --git a/allenact_plugins/babyai_plugin/babyai_models.py b/allenact_plugins/babyai_plugin/babyai_models.py
index 5d7a1dd5b..1d81169be 100644
--- a/allenact_plugins/babyai_plugin/babyai_models.py
+++ b/allenact_plugins/babyai_plugin/babyai_models.py
@@ -191,13 +191,13 @@ def forward_loop(
                     for sampler_needing_reset_ind in time_ind_to_which_need_instr_reset[
                         time_ind
                     ]:
-                        current_instr_embeddings_list[
-                            sampler_needing_reset_ind
-                        ] = unique_instr_embeddings[
-                            reset_multi_ind_to_index[
-                                (time_ind, sampler_needing_reset_ind)
+                        current_instr_embeddings_list[sampler_needing_reset_ind] = (
+                            unique_instr_embeddings[
+                                reset_multi_ind_to_index[
+                                    (time_ind, sampler_needing_reset_ind)
+                                ]
                             ]
-                        ]
+                        )
 
                     instr_embeddings_list.append(
                         torch.stack(current_instr_embeddings_list, dim=0)
@@ -233,16 +233,20 @@ def forward_loop(
         }
         return (
             ActorCriticOutput(
-                distributions=CategoricalDistr(logits=self.actor(embedding),),
+                distributions=CategoricalDistr(
+                    logits=self.actor(embedding),
+                ),
                 values=self.critic(embedding),
-                extras=extra_predictions
-                if not self.include_auxiliary_head
-                else {
-                    **extra_predictions,
-                    "auxiliary_distributions": cast(
-                        Any, CategoricalDistr(logits=self.aux(embedding))
-                    ),
-                },
+                extras=(
+                    extra_predictions
+                    if not self.include_auxiliary_head
+                    else {
+                        **extra_predictions,
+                        "auxiliary_distributions": cast(
+                            Any, CategoricalDistr(logits=self.aux(embedding))
+                        ),
+                    }
+                ),
             ),
             torch.stack([r["memory"] for r in results], dim=0),
         )
@@ -348,13 +352,13 @@ def forward(
                     for sampler_needing_reset_ind in time_ind_to_which_need_instr_reset[
                         time_ind
                     ]:
-                        current_instr_embeddings_list[
-                            sampler_needing_reset_ind
-                        ] = unique_instr_embeddings[
-                            reset_multi_ind_to_index[
-                                (time_ind, sampler_needing_reset_ind)
+                        current_instr_embeddings_list[sampler_needing_reset_ind] = (
+                            unique_instr_embeddings[
+                                reset_multi_ind_to_index[
+                                    (time_ind, sampler_needing_reset_ind)
+                                ]
                             ]
-                        ]
+                        )
 
                     instr_embeddings_list.append(
                         torch.stack(current_instr_embeddings_list, dim=0)
@@ -436,14 +440,20 @@ def forward(
         embedding = embedding.view(rollouts_len * nsamplers, -1)
 
         ac_output = ActorCriticOutput(
-            distributions=CategoricalDistr(logits=self.actor(embedding),),
+            distributions=CategoricalDistr(
+                logits=self.actor(embedding),
+            ),
             values=self.critic(embedding),
-            extras=extra_predictions
-            if not self.include_auxiliary_head
-            else {
-                **extra_predictions,
-                "auxiliary_distributions": CategoricalDistr(logits=self.aux(embedding)),
-            },
+            extras=(
+                extra_predictions
+                if not self.include_auxiliary_head
+                else {
+                    **extra_predictions,
+                    "auxiliary_distributions": CategoricalDistr(
+                        logits=self.aux(embedding)
+                    ),
+                }
+            ),
         )
         hidden_states = memory
 
@@ -582,7 +592,10 @@ def __init__(
         self.include_auxiliary_head = include_auxiliary_head
 
         self.baby_ai_model = BabyAIACModelWrapped(
-            obs_space={"image": 7 * 7 * 3, "instr": 100,},
+            obs_space={
+                "image": 7 * 7 * 3,
+                "instr": 100,
+            },
             action_space=action_space,
             image_dim=image_dim,
             memory_dim=memory_dim,
diff --git a/allenact_plugins/babyai_plugin/babyai_tasks.py b/allenact_plugins/babyai_plugin/babyai_tasks.py
index 9a239cb04..6fe9e26aa 100644
--- a/allenact_plugins/babyai_plugin/babyai_tasks.py
+++ b/allenact_plugins/babyai_plugin/babyai_tasks.py
@@ -173,7 +173,7 @@ def __init__(
         else:
             self.env = env_builder()
 
-        self.np_seeded_random_gen, _ = seeding.np_random(random.randint(0, 2 ** 31 - 1))
+        self.np_seeded_random_gen, _ = seeding.np_random(random.randint(0, 2**31 - 1))
         self.num_tasks_generated = 0
 
     @property
@@ -206,7 +206,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[BabyAITask]:
                     self.task_seeds_list
                 )
         else:
-            self._last_env_seed = self.np_seeded_random_gen.randint(0, 2 ** 31 - 1)
+            self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)
 
         self.env.seed(self._last_env_seed)
         self.env.saved_seed = self._last_env_seed
diff --git a/allenact_plugins/clip_plugin/clip_preprocessors.py b/allenact_plugins/clip_plugin/clip_preprocessors.py
index 0d6468c1b..50a01db4c 100644
--- a/allenact_plugins/clip_plugin/clip_preprocessors.py
+++ b/allenact_plugins/clip_plugin/clip_preprocessors.py
@@ -124,9 +124,7 @@ def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any:
             processed_chunks = []
             for idx in range(0, n, self.chunk_size):
                 processed_chunks.append(
-                    self.resnet(
-                        x[idx : min(idx + self.chunk_size, n)]
-                    ).float()
+                    self.resnet(x[idx : min(idx + self.chunk_size, n)]).float()
                 )
             x = torch.cat(processed_chunks, dim=0)
         else:
diff --git a/allenact_plugins/gym_plugin/gym_models.py b/allenact_plugins/gym_plugin/gym_models.py
index e79de8863..2010f48c0 100644
--- a/allenact_plugins/gym_plugin/gym_models.py
+++ b/allenact_plugins/gym_plugin/gym_models.py
@@ -43,7 +43,8 @@ def __init__(
 
         # critic
         self.critic = nn.Sequential(
-            *self.make_mlp_hidden(nn.Tanh, *mlp_hidden_dims), nn.Linear(32, 1),
+            *self.make_mlp_hidden(nn.Tanh, *mlp_hidden_dims),
+            nn.Linear(32, 1),
         )
 
         # maximum standard deviation
@@ -57,7 +58,9 @@ def __init__(
     def make_mlp_hidden(nl, *dims):
         res = []
         for it, dim in enumerate(dims[:-1]):
-            res.append(nn.Linear(dim, dims[it + 1]),)
+            res.append(
+                nn.Linear(dim, dims[it + 1]),
+            )
             res.append(nl())
         return res
 
diff --git a/allenact_plugins/gym_plugin/gym_tasks.py b/allenact_plugins/gym_plugin/gym_tasks.py
index 51c771e2d..b1e78cf66 100644
--- a/allenact_plugins/gym_plugin/gym_tasks.py
+++ b/allenact_plugins/gym_plugin/gym_tasks.py
@@ -220,7 +220,7 @@ def __init__(
             self.set_seed(seed)
         else:
             self.np_seeded_random_gen, _ = seeding.np_random(
-                random.randint(0, 2 ** 31 - 1)
+                random.randint(0, 2**31 - 1)
             )
 
         self.num_tasks_generated = 0
@@ -272,7 +272,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[GymTask]:
                 repeating = True
             else:
                 self._number_of_steps_taken_with_task_seed = 0
-                self._last_env_seed = self.np_seeded_random_gen.randint(0, 2 ** 31 - 1)
+                self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)
 
         task_has_same_seed_reset = hasattr(self.env, "same_seed_reset")
 
@@ -286,7 +286,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[GymTask]:
 
         self.num_tasks_generated += 1
 
-        task_info = {"id": "random%d" % random.randint(0, 2 ** 63 - 1)}
+        task_info = {"id": "random%d" % random.randint(0, 2**63 - 1)}
 
         self._last_task = self.task_type(
             **dict(env=self.env, sensors=self.sensors, task_info=task_info),
diff --git a/allenact_plugins/habitat_plugin/habitat_constants.py b/allenact_plugins/habitat_plugin/habitat_constants.py
index 988718458..105f1a04c 100644
--- a/allenact_plugins/habitat_plugin/habitat_constants.py
+++ b/allenact_plugins/habitat_plugin/habitat_constants.py
@@ -4,7 +4,10 @@
     "HABITAT_BASE_DIR",
     default=os.path.join(os.getcwd(), "external_projects", "habitat-lab"),
 )
-HABITAT_DATA_BASE = os.path.join(os.getcwd(), "data",)
+HABITAT_DATA_BASE = os.path.join(
+    os.getcwd(),
+    "data",
+)
 
 if (not os.path.exists(HABITAT_BASE)) or (not os.path.exists(HABITAT_DATA_BASE)):
     raise ImportError(
diff --git a/allenact_plugins/habitat_plugin/habitat_environment.py b/allenact_plugins/habitat_plugin/habitat_environment.py
index 8ec09107f..882fa1e17 100644
--- a/allenact_plugins/habitat_plugin/habitat_environment.py
+++ b/allenact_plugins/habitat_plugin/habitat_environment.py
@@ -1,4 +1,5 @@
 """A wrapper for interacting with the Habitat environment."""
+
 import os
 from typing import Dict, Union, List, Optional
 
@@ -48,7 +49,9 @@ def get_rotation(self) -> Optional[List[float]]:
         return self.env.sim.get_agent_state().rotation
 
     def get_shortest_path(
-        self, source_state: AgentState, target_state: AgentState,
+        self,
+        source_state: AgentState,
+        target_state: AgentState,
     ) -> List[ShortestPathPoint]:
         return self.env.sim.action_space_shortest_path(source_state, [target_state])
 
diff --git a/allenact_plugins/habitat_plugin/habitat_preprocessors.py b/allenact_plugins/habitat_plugin/habitat_preprocessors.py
index 139597f9c..8b1378917 100644
--- a/allenact_plugins/habitat_plugin/habitat_preprocessors.py
+++ b/allenact_plugins/habitat_plugin/habitat_preprocessors.py
@@ -1,2 +1 @@
 
-
diff --git a/allenact_plugins/habitat_plugin/habitat_utils.py b/allenact_plugins/habitat_plugin/habitat_utils.py
index a002ec2b0..ffa32ff8a 100644
--- a/allenact_plugins/habitat_plugin/habitat_utils.py
+++ b/allenact_plugins/habitat_plugin/habitat_utils.py
@@ -10,7 +10,8 @@
 
 
 def construct_env_configs(
-    config: Config, allow_scene_repeat: bool = False,
+    config: Config,
+    allow_scene_repeat: bool = False,
 ) -> List[Config]:
     """Create list of Habitat Configs for training on multiple processes To
     allow better performance, dataset are split into small ones for each
@@ -62,9 +63,9 @@ def construct_env_configs(
         if len(config.SIMULATOR_GPU_IDS) == 0:
             task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = -1
         else:
-            task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = config.SIMULATOR_GPU_IDS[
-                i % len(config.SIMULATOR_GPU_IDS)
-            ]
+            task_config.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = (
+                config.SIMULATOR_GPU_IDS[i % len(config.SIMULATOR_GPU_IDS)]
+            )
 
         task_config.freeze()
 
diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py
index beda87058..d9a7fd866 100644
--- a/allenact_plugins/ithor_plugin/ithor_environment.py
+++ b/allenact_plugins/ithor_plugin/ithor_environment.py
@@ -164,7 +164,10 @@ def last_action_return(self, value: Any) -> None:
         self.controller.last_event.metadata["actionReturn"] = value
 
     def start(
-        self, scene_name: Optional[str], move_mag: float = 0.25, **kwargs,
+        self,
+        scene_name: Optional[str],
+        move_mag: float = 0.25,
+        **kwargs,
     ) -> None:
         """Starts the ai2thor controller if it was previously stopped.
 
@@ -216,7 +219,10 @@ def stop(self) -> None:
             self._started = False
 
     def reset(
-        self, scene_name: Optional[str], move_mag: float = 0.25, **kwargs,
+        self,
+        scene_name: Optional[str],
+        move_mag: float = 0.25,
+        **kwargs,
     ):
         """Resets the ai2thor in a new scene.
 
@@ -295,9 +301,9 @@ def teleport_agent_to(
                     break
             if not reachable:
                 self.last_action = "TeleportFull"
-                self.last_event.metadata[
-                    "errorMessage"
-                ] = "Target position was not initially reachable."
+                self.last_event.metadata["errorMessage"] = (
+                    "Target position was not initially reachable."
+                )
                 self.last_action_success = False
                 return
         self.controller.step(
@@ -675,9 +681,9 @@ def step(
                     self.teleport_agent_to(**start_location, force_action=True)  # type: ignore
                     self.last_action = action
                     self.last_action_success = False
-                    self.last_event.metadata[
-                        "errorMessage"
-                    ] = "Moved to location outside of initially reachable points."
+                    self.last_event.metadata["errorMessage"] = (
+                        "Moved to location outside of initially reachable points."
+                    )
         elif "RandomizeHideSeekObjects" in action:
             last_position = self.get_agent_location()
             self.controller.step(action_dict)
diff --git a/allenact_plugins/ithor_plugin/ithor_sensors.py b/allenact_plugins/ithor_plugin/ithor_sensors.py
index 71fb7ffb9..9785b7053 100644
--- a/allenact_plugins/ithor_plugin/ithor_sensors.py
+++ b/allenact_plugins/ithor_plugin/ithor_sensors.py
@@ -41,7 +41,9 @@ class RGBSensorThor(RGBSensor[THOR_ENV_TYPE, THOR_TASK_TYPE]):
     """
 
     def frame_from_env(
-        self, env: THOR_ENV_TYPE, task: Optional[THOR_TASK_TYPE],
+        self,
+        env: THOR_ENV_TYPE,
+        task: Optional[THOR_TASK_TYPE],
     ) -> np.ndarray:  # type:ignore
         if isinstance(env, ai2thor.controller.Controller):
             return env.last_event.frame.copy()
@@ -248,7 +250,8 @@ def __init__(self, margin: float, uuid: str = "scene_bounds", **kwargs: Any):
 
     @staticmethod
     def get_bounds(
-        controller: ai2thor.controller.Controller, margin: float,
+        controller: ai2thor.controller.Controller,
+        margin: float,
     ) -> Dict[str, np.ndarray]:
         positions = controller.step("GetReachablePositions").metadata["actionReturn"]
         min_x = min(p["x"] for p in positions)
@@ -482,7 +485,10 @@ def __init__(
 
         def get_map_space(nchannels: int, size: int):
             return gym.spaces.Box(
-                low=0, high=1, shape=(size, size, nchannels), dtype=np.bool_,
+                low=0,
+                high=1,
+                shape=(size, size, nchannels),
+                dtype=np.bool_,
             )
 
         n = len(self.ordered_object_types)
@@ -490,12 +496,24 @@ def get_map_space(nchannels: int, size: int):
         big = self.semantic_map_builder.ground_truth_semantic_map.shape[0]
 
         space_dict = {
-            "egocentric_update": get_map_space(nchannels=n, size=small,),
-            "egocentric_mask": get_map_space(nchannels=1, size=small,),
+            "egocentric_update": get_map_space(
+                nchannels=n,
+                size=small,
+            ),
+            "egocentric_mask": get_map_space(
+                nchannels=1,
+                size=small,
+            ),
         }
         if not ego_only:
-            space_dict["explored_mask"] = get_map_space(nchannels=1, size=big,)
-            space_dict["map"] = get_map_space(nchannels=n, size=big,)
+            space_dict["explored_mask"] = get_map_space(
+                nchannels=1,
+                size=big,
+            )
+            space_dict["map"] = get_map_space(
+                nchannels=n,
+                size=big,
+            )
 
         observation_space = gym.spaces.Dict(space_dict)
         super().__init__(**prepare_locals_for_super(locals()))
diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
index e43b699af..aee008c59 100644
--- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py
+++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
@@ -39,9 +39,9 @@ def __init__(
         self.scene_counter: Optional[int] = None
         self.scene_order: Optional[List[str]] = None
         self.scene_id: Optional[int] = None
-        self.scene_period: Optional[
-            Union[str, int]
-        ] = scene_period  # default makes a random choice
+        self.scene_period: Optional[Union[str, int]] = (
+            scene_period  # default makes a random choice
+        )
         self.max_tasks: Optional[int] = None
         self.reset_tasks = max_tasks
 
@@ -174,9 +174,9 @@ def next_task(
             )
 
         task_info["start_pose"] = copy.copy(pose)
-        task_info[
-            "id"
-        ] = f"{scene}__{'_'.join(list(map(str, self.env.get_key(pose))))}__{task_info['object_type']}"
+        task_info["id"] = (
+            f"{scene}__{'_'.join(list(map(str, self.env.get_key(pose))))}__{task_info['object_type']}"
+        )
 
         self._last_sampled_task = ObjectNaviThorGridTask(
             env=self.env,
diff --git a/allenact_plugins/ithor_plugin/ithor_tasks.py b/allenact_plugins/ithor_plugin/ithor_tasks.py
index 624dee375..6e63221b8 100644
--- a/allenact_plugins/ithor_plugin/ithor_tasks.py
+++ b/allenact_plugins/ithor_plugin/ithor_tasks.py
@@ -214,13 +214,13 @@ def query_expert(self, **kwargs) -> Tuple[int, bool]:
                             if standing == 1
                         )
 
-                    self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[
-                        key
-                    ] = locations_from_which_object_is_visible
+                    self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key] = (
+                        locations_from_which_object_is_visible
+                    )
 
-                self._subsampled_locations_from_which_obj_visible = self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[
-                    key
-                ]
+                self._subsampled_locations_from_which_obj_visible = (
+                    self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key]
+                )
                 if len(self._subsampled_locations_from_which_obj_visible) > 5:
                     self._subsampled_locations_from_which_obj_visible = random.sample(
                         self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE[key], 5
diff --git a/allenact_plugins/ithor_plugin/ithor_util.py b/allenact_plugins/ithor_plugin/ithor_util.py
index 0446bc75d..56c9110f2 100644
--- a/allenact_plugins/ithor_plugin/ithor_util.py
+++ b/allenact_plugins/ithor_plugin/ithor_util.py
@@ -43,7 +43,9 @@ def horizontal_to_vertical_fov(
     horizontal_fov_in_degrees: float, height: float, width: float
 ):
     return vertical_to_horizontal_fov(
-        vertical_fov_in_degrees=horizontal_fov_in_degrees, height=width, width=height,
+        vertical_fov_in_degrees=horizontal_fov_in_degrees,
+        height=width,
+        width=height,
     )
 
 
diff --git a/allenact_plugins/lighthouse_plugin/lighthouse_environment.py b/allenact_plugins/lighthouse_plugin/lighthouse_environment.py
index d958f522c..a89721ff3 100644
--- a/allenact_plugins/lighthouse_plugin/lighthouse_environment.py
+++ b/allenact_plugins/lighthouse_plugin/lighthouse_environment.py
@@ -76,7 +76,7 @@ def __init__(self, world_dim: int, world_radius: int, **kwargs):
         )
         self.current_position = np.zeros(world_dim, dtype=int)
         self.closest_distance_to_corners = np.full(
-            2 ** world_dim, fill_value=world_radius, dtype=int
+            2**world_dim, fill_value=world_radius, dtype=int
         )
         self.positions: List[Tuple[int, ...]] = [tuple(self.current_position)]
         self.goal_position: Optional[np.ndarray] = None
@@ -84,7 +84,7 @@ def __init__(self, world_dim: int, world_radius: int, **kwargs):
 
         self.seed: Optional[int] = None
         self.np_seeded_random_gen: Optional[np.random.RandomState] = None
-        self.set_seed(seed=int(kwargs.get("seed", np.random.randint(0, 2 ** 31 - 1))))
+        self.set_seed(seed=int(kwargs.get("seed", np.random.randint(0, 2**31 - 1))))
 
         self.random_reset()
 
diff --git a/allenact_plugins/lighthouse_plugin/lighthouse_sensors.py b/allenact_plugins/lighthouse_plugin/lighthouse_sensors.py
index 0ac071aad..cbbb34c7f 100644
--- a/allenact_plugins/lighthouse_plugin/lighthouse_sensors.py
+++ b/allenact_plugins/lighthouse_plugin/lighthouse_sensors.py
@@ -66,7 +66,10 @@ def get_corner_observation(
             [on_border_value, last_action],
         ),
         axis=0,
-        out=np.zeros((seen_corner_values.shape[0] + 2,), dtype=np.float32,),
+        out=np.zeros(
+            (seen_corner_values.shape[0] + 2,),
+            dtype=np.float32,
+        ),
     )
 
 
@@ -90,7 +93,7 @@ def _get_observation_space(self):
         return gym.spaces.Box(
             low=min(LightHouseEnvironment.SPACE_LEVELS),
             high=max(LightHouseEnvironment.SPACE_LEVELS),
-            shape=(2 ** self.world_dim + 2,),
+            shape=(2**self.world_dim + 2,),
             dtype=int,
         )
 
@@ -192,7 +195,7 @@ def view_tuple_to_design_array(self, view_tuple: Tuple):
 
     @classmethod
     def output_dim(cls, world_dim: int):
-        return ((3 if world_dim == 1 else 4) ** (2 ** world_dim)) * (
+        return ((3 if world_dim == 1 else 4) ** (2**world_dim)) * (
             2 * world_dim + 1
         ) ** 2
 
@@ -239,7 +242,7 @@ def _get_variables_and_levels(world_dim: int):
         return (
             [
                 ("s{}".format(i), list(range(3 if world_dim == 1 else 4)))
-                for i in range(2 ** world_dim)
+                for i in range(2**world_dim)
             ]
             + [("b{}".format(i), list(range(2 * world_dim + 1))) for i in range(1)]
             + [("a{}".format(i), list(range(2 * world_dim + 1))) for i in range(1)]
diff --git a/allenact_plugins/lighthouse_plugin/lighthouse_tasks.py b/allenact_plugins/lighthouse_plugin/lighthouse_tasks.py
index c5634d98b..c3d3089b5 100644
--- a/allenact_plugins/lighthouse_plugin/lighthouse_tasks.py
+++ b/allenact_plugins/lighthouse_plugin/lighthouse_tasks.py
@@ -161,7 +161,9 @@ def query_expert(
         **kwargs,
     ) -> Tuple[Any, bool]:
         view_tuple = get_corner_observation(
-            env=self.env, view_radius=expert_view_radius, view_corner_offsets=None,
+            env=self.env,
+            view_radius=expert_view_radius,
+            view_corner_offsets=None,
         )
 
         goal = self.env.GOAL
@@ -359,7 +361,7 @@ def __init__(
             )
 
         self.seed: int = int(
-            seed if seed is not None else np.random.randint(0, 2 ** 31 - 1)
+            seed if seed is not None else np.random.randint(0, 2**31 - 1)
         )
         self.np_seeded_random_gen: Optional[np.random.RandomState] = None
         self.set_seed(self.seed)
@@ -382,7 +384,7 @@ def length(self) -> Union[int, float]:
 
     @property
     def total_unique(self) -> Optional[Union[int, float]]:
-        n = 2 ** self.world_dim
+        n = 2**self.world_dim
         return n if self.num_unique_seeds is None else min(n, self.num_unique_seeds)
 
     @property
@@ -401,7 +403,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[Task]:
             else:
                 seed = self.np_seeded_random_gen.choice(self.task_seeds_list)
         else:
-            seed = self.np_seeded_random_gen.randint(0, 2 ** 31 - 1)
+            seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)
 
         self.num_tasks_generated += 1
 
diff --git a/allenact_plugins/lighthouse_plugin/lighthouse_util.py b/allenact_plugins/lighthouse_plugin/lighthouse_util.py
index 4a8b76e8b..baaaa5700 100644
--- a/allenact_plugins/lighthouse_plugin/lighthouse_util.py
+++ b/allenact_plugins/lighthouse_plugin/lighthouse_util.py
@@ -13,7 +13,10 @@ def __init__(self, optimal: float, deviation: float, min_memory_size: int = 100)
         self.memory: np.ndarray = np.zeros(min_memory_size)
 
     def __call__(
-        self, stage_steps: int, total_steps: int, training_metrics: ScalarMeanTracker,
+        self,
+        stage_steps: int,
+        total_steps: int,
+        training_metrics: ScalarMeanTracker,
     ) -> bool:
         sums = training_metrics.sums()
         counts = training_metrics.counts()
@@ -36,9 +39,9 @@ def __call__(
                 self.current_pos = 0
                 self.has_filled = True
             else:
-                self.memory[
-                    self.current_pos : (self.current_pos + count)
-                ] = ep_length_ave
+                self.memory[self.current_pos : (self.current_pos + count)] = (
+                    ep_length_ave
+                )
 
                 if self.current_pos + count > n:
                     self.has_filled = True
diff --git a/allenact_plugins/manipulathor_plugin/arm_calculation_utils.py b/allenact_plugins/manipulathor_plugin/arm_calculation_utils.py
index 827a2d844..2a048f4b9 100644
--- a/allenact_plugins/manipulathor_plugin/arm_calculation_utils.py
+++ b/allenact_plugins/manipulathor_plugin/arm_calculation_utils.py
@@ -1,5 +1,6 @@
 """Utility classes and functions for calculating the arm relative and absolute
 position."""
+
 from typing import Dict
 
 import numpy as np
diff --git a/allenact_plugins/manipulathor_plugin/manipulathor_constants.py b/allenact_plugins/manipulathor_plugin/manipulathor_constants.py
index 5832ae7a0..8f2927708 100644
--- a/allenact_plugins/manipulathor_plugin/manipulathor_constants.py
+++ b/allenact_plugins/manipulathor_plugin/manipulathor_constants.py
@@ -1,4 +1,5 @@
 """Constant values and hyperparameters that are used by the environment."""
+
 import ai2thor.fifo_server
 
 
diff --git a/allenact_plugins/manipulathor_plugin/manipulathor_environment.py b/allenact_plugins/manipulathor_plugin/manipulathor_environment.py
index 00988c81b..5caed01ae 100644
--- a/allenact_plugins/manipulathor_plugin/manipulathor_environment.py
+++ b/allenact_plugins/manipulathor_plugin/manipulathor_environment.py
@@ -140,7 +140,10 @@ def create_controller(self):
         return controller
 
     def start(
-        self, scene_name: Optional[str], move_mag: float = 0.25, **kwargs,
+        self,
+        scene_name: Optional[str],
+        move_mag: float = 0.25,
+        **kwargs,
     ) -> None:
         """Starts the ai2thor controller if it was previously stopped.
 
@@ -163,7 +166,10 @@ def start(
         self.reset(scene_name=scene_name, move_mag=move_mag, **kwargs)
 
     def reset(
-        self, scene_name: Optional[str], move_mag: float = 0.25, **kwargs,
+        self,
+        scene_name: Optional[str],
+        move_mag: float = 0.25,
+        **kwargs,
     ):
         self._move_mag = move_mag
         self._grid_size = self._move_mag
@@ -225,7 +231,7 @@ def object_in_hand(self):
     @classmethod
     def correct_nan_inf(cls, flawed_dict, extra_tag=""):
         corrected_dict = copy.deepcopy(flawed_dict)
-        for (k, v) in corrected_dict.items():
+        for k, v in corrected_dict.items():
             if math.isnan(v) or math.isinf(v):
                 corrected_dict[k] = 0
         return corrected_dict
@@ -275,7 +281,9 @@ def get_current_object_locations(self):
         metadata = self.controller.last_event.metadata["objects"]
         for o in metadata:
             obj_loc_dict[o["objectId"]] = dict(
-                position=o["position"], rotation=o["rotation"], visible=o["visible"],
+                position=o["position"],
+                rotation=o["rotation"],
+                visible=o["visible"],
             )
         return copy.deepcopy(obj_loc_dict)
 
diff --git a/allenact_plugins/manipulathor_plugin/manipulathor_sensors.py b/allenact_plugins/manipulathor_plugin/manipulathor_sensors.py
index b0baf524c..d6968770a 100644
--- a/allenact_plugins/manipulathor_plugin/manipulathor_sensors.py
+++ b/allenact_plugins/manipulathor_plugin/manipulathor_sensors.py
@@ -1,4 +1,5 @@
 """Utility classes and functions for sensory inputs used by the models."""
+
 from typing import Any, Union, Optional
 
 import gym
@@ -20,7 +21,10 @@
 
 
 class DepthSensorThor(
-    DepthSensor[Union[ManipulaTHOREnvironment], Union[Task[ManipulaTHOREnvironment]],]
+    DepthSensor[
+        Union[ManipulaTHOREnvironment],
+        Union[Task[ManipulaTHOREnvironment]],
+    ]
 ):
     """Sensor for Depth images in THOR.
 
@@ -35,7 +39,10 @@ def frame_from_env(
 
 
 class NoVisionSensorThor(
-    RGBSensor[Union[ManipulaTHOREnvironment], Union[Task[ManipulaTHOREnvironment]],]
+    RGBSensor[
+        Union[ManipulaTHOREnvironment],
+        Union[Task[ManipulaTHOREnvironment]],
+    ]
 ):
     """Sensor for RGB images in THOR.
 
@@ -108,7 +115,9 @@ def get_observation(
         relative_current_obj = world_coords_to_agent_coords(object_info, agent_state)
         relative_goal_state = world_coords_to_agent_coords(target_state, agent_state)
         relative_distance = diff_position(
-            relative_current_obj, relative_goal_state, absolute=False,
+            relative_current_obj,
+            relative_goal_state,
+            absolute=False,
         )
 
         result = coord_system_transform(relative_distance, self.coord_system)
@@ -161,7 +170,7 @@ def get_observation(
         relative_distance = diff_position(relative_current_obj, relative_goal_state)
         result = state_dict_to_tensor(dict(position=relative_distance))
 
-        result = ((result ** 2).sum() ** 0.5).view(1)
+        result = ((result**2).sum() ** 0.5).view(1)
         return result
 
 
@@ -202,7 +211,9 @@ def get_observation(
             hand_state, env.controller.last_event.metadata["agent"]
         )
         relative_distance = diff_position(
-            relative_goal_obj, relative_hand_state, absolute=False,
+            relative_goal_obj,
+            relative_hand_state,
+            absolute=False,
         )
         result = coord_system_transform(relative_distance, self.coord_system)
         return result
@@ -257,7 +268,7 @@ def get_observation(
         relative_distance = diff_position(relative_goal_obj, relative_hand_state)
         result = state_dict_to_tensor(dict(position=relative_distance))
 
-        result = ((result ** 2).sum() ** 0.5).view(1)
+        result = ((result**2).sum() ** 0.5).view(1)
         return result
 
 
diff --git a/allenact_plugins/manipulathor_plugin/manipulathor_task_samplers.py b/allenact_plugins/manipulathor_plugin/manipulathor_task_samplers.py
index 7159f88d2..75812e456 100644
--- a/allenact_plugins/manipulathor_plugin/manipulathor_task_samplers.py
+++ b/allenact_plugins/manipulathor_plugin/manipulathor_task_samplers.py
@@ -1,4 +1,5 @@
 """Task Samplers for the task of ArmPointNav."""
+
 import json
 import random
 from typing import List, Dict, Optional, Any, Union
@@ -66,9 +67,9 @@ def __init__(
         self.scene_counter: Optional[int] = None
         self.scene_order: Optional[List[str]] = None
         self.scene_id: Optional[int] = None
-        self.scene_period: Optional[
-            Union[str, int]
-        ] = scene_period  # default makes a random choice
+        self.scene_period: Optional[Union[str, int]] = (
+            scene_period  # default makes a random choice
+        )
         self.max_tasks: Optional[int] = None
         self.reset_tasks = max_tasks
 
@@ -87,7 +88,9 @@ def __init__(
 
     def _create_environment(self, **kwargs) -> ManipulaTHOREnvironment:
         env = ManipulaTHOREnvironment(
-            make_agents_visible=False, object_open_speed=0.05, env_args=self.env_args,
+            make_agents_visible=False,
+            object_open_speed=0.05,
+            env_args=self.env_args,
         )
 
         return env
diff --git a/allenact_plugins/manipulathor_plugin/manipulathor_tasks.py b/allenact_plugins/manipulathor_plugin/manipulathor_tasks.py
index c72ea97c3..0b61dc0f4 100644
--- a/allenact_plugins/manipulathor_plugin/manipulathor_tasks.py
+++ b/allenact_plugins/manipulathor_plugin/manipulathor_tasks.py
@@ -164,7 +164,7 @@ def calc_action_stat_metrics(self) -> Dict[str, Any]:
         action_success_stat["action_success/total"] = 0.0
 
         seq_len = len(self.action_sequence_and_success)
-        for (action_name, action_success) in self.action_sequence_and_success:
+        for action_name, action_success in self.action_sequence_and_success:
             action_stat["action_stat/" + action_name] += 1.0
             action_success_stat[
                 "action_success/{}".format(action_name)
@@ -191,9 +191,9 @@ def metrics(self) -> Dict[str, Any]:
 
             # 1. goal object metrics
             final_obj_distance_from_goal = self.obj_distance_from_goal()
-            result[
-                "average/final_obj_distance_from_goal"
-            ] = final_obj_distance_from_goal
+            result["average/final_obj_distance_from_goal"] = (
+                final_obj_distance_from_goal
+            )
             final_arm_distance_from_obj = self.arm_distance_from_obj()
             result["average/final_arm_distance_from_obj"] = final_arm_distance_from_obj
 
@@ -309,12 +309,12 @@ def metrics(self) -> Dict[str, Any]:
 
         if self.is_done():
             # add disturbance distance metrics
-            result[
-                "disturbance/objects_moved_distance"
-            ] = self.cumulated_disturb_distance_all
-            result[
-                "disturbance/objects_moved_distance_vis"
-            ] = self.cumulated_disturb_distance_visible
+            result["disturbance/objects_moved_distance"] = (
+                self.cumulated_disturb_distance_all
+            )
+            result["disturbance/objects_moved_distance_vis"] = (
+                self.cumulated_disturb_distance_visible
+            )
 
         return result
 
diff --git a/allenact_plugins/manipulathor_plugin/manipulathor_viz.py b/allenact_plugins/manipulathor_plugin/manipulathor_viz.py
index 27a949b15..a9369366d 100644
--- a/allenact_plugins/manipulathor_plugin/manipulathor_viz.py
+++ b/allenact_plugins/manipulathor_plugin/manipulathor_viz.py
@@ -1,4 +1,5 @@
 """Utility functions and classes for visualization and logging."""
+
 import os
 from datetime import datetime
 
@@ -23,7 +24,10 @@ def __init__(self, exp_name="", log_dir=""):
         if exp_name == "":
             exp_name = "NoNameExp"
         self.exp_name = exp_name
-        log_dir = os.path.join(exp_name, log_dir,)
+        log_dir = os.path.join(
+            exp_name,
+            log_dir,
+        )
         self.log_dir = log_dir
         os.makedirs(self.log_dir, exist_ok=True)
         self.log_queue = []
@@ -53,7 +57,7 @@ def __init__(self, exp_name="", log_dir="", **kwargs):
 
     def average_dict(self):
         result = {}
-        for (k, v) in self.total_metric_dict.items():
+        for k, v in self.total_metric_dict.items():
             result[k] = sum(v) / len(v)
         return result
 
diff --git a/allenact_plugins/minigrid_plugin/minigrid_offpolicy.py b/allenact_plugins/minigrid_plugin/minigrid_offpolicy.py
index 4c641de3a..b292faa88 100644
--- a/allenact_plugins/minigrid_plugin/minigrid_offpolicy.py
+++ b/allenact_plugins/minigrid_plugin/minigrid_offpolicy.py
@@ -325,6 +325,9 @@ def next_batch(self) -> Dict[str, torch.Tensor]:
 
         self._total_experiences += self.num_samplers * self.rollout_len
         return {
-            key: torch.stack(all_data[key], dim=1,)  # new sampler dim
+            key: torch.stack(
+                all_data[key],
+                dim=1,
+            )  # new sampler dim
             for key in all_data
         }
diff --git a/allenact_plugins/minigrid_plugin/minigrid_sensors.py b/allenact_plugins/minigrid_plugin/minigrid_sensors.py
index 796e156b5..d7195d8d3 100644
--- a/allenact_plugins/minigrid_plugin/minigrid_sensors.py
+++ b/allenact_plugins/minigrid_plugin/minigrid_sensors.py
@@ -126,7 +126,9 @@ def get_observation(
             out = out[: self.instr_len]
         elif n < self.instr_len:
             out = torch.nn.functional.pad(
-                input=out, pad=[0, self.instr_len - n], value=0,
+                input=out,
+                pad=[0, self.instr_len - n],
+                value=0,
             )
 
         return out.long().numpy()
diff --git a/allenact_plugins/minigrid_plugin/minigrid_tasks.py b/allenact_plugins/minigrid_plugin/minigrid_tasks.py
index 88ce30483..6811d29d8 100644
--- a/allenact_plugins/minigrid_plugin/minigrid_tasks.py
+++ b/allenact_plugins/minigrid_plugin/minigrid_tasks.py
@@ -29,7 +29,14 @@ class MiniGridTask(Task[CrossingEnv]):
     )
     _CACHED_GRAPHS: Dict[str, nx.DiGraph] = {}
     _NEIGHBOR_OFFSETS = tuple(
-        [(-1, 0, 0), (0, -1, 0), (0, 0, -1), (1, 0, 0), (0, 1, 0), (0, 0, 1),]
+        [
+            (-1, 0, 0),
+            (0, -1, 0),
+            (0, 0, -1),
+            (1, 0, 0),
+            (0, 1, 0),
+            (0, 0, 1),
+        ]
     )
 
     _XY_DIFF_TO_AGENT_DIR = {
@@ -156,7 +163,10 @@ def possible_neighbor_offsets(cls) -> Tuple[Tuple[int, int, int], ...]:
 
     @classmethod
     def _add_from_to_edge(
-        cls, g: nx.DiGraph, s: Tuple[int, int, int], t: Tuple[int, int, int],
+        cls,
+        g: nx.DiGraph,
+        s: Tuple[int, int, int],
+        t: Tuple[int, int, int],
     ):
         """Adds nodes and corresponding edges to existing nodes.
         This approach avoids adding the same edge multiple times.
@@ -237,7 +247,9 @@ def _add_node_to_graph(
                     self._add_from_to_edge(graph, s, t)
                     self._add_from_to_edge(graph, t, s)
 
-    def generate_graph(self,) -> nx.DiGraph:
+    def generate_graph(
+        self,
+    ) -> nx.DiGraph:
         """The generated graph is based on the fully observable grid (as the
         expert sees it all).
 
@@ -449,7 +461,7 @@ def __init__(
         self.env = env_class(**env_info)
         self.task_class = task_class
 
-        self.np_seeded_random_gen, _ = seeding.np_random(random.randint(0, 2 ** 31 - 1))
+        self.np_seeded_random_gen, _ = seeding.np_random(random.randint(0, 2**31 - 1))
 
         self.num_tasks_generated = 0
 
@@ -499,7 +511,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[MiniGridTask]
                 repeating = True
             else:
                 self._number_of_steps_taken_with_task_seed = 0
-                self._last_env_seed = self.np_seeded_random_gen.randint(0, 2 ** 31 - 1)
+                self._last_env_seed = self.np_seeded_random_gen.randint(0, 2**31 - 1)
 
         task_has_same_seed_reset = hasattr(self.env, "same_seed_reset")
 
diff --git a/allenact_plugins/navigation_plugin/objectnav/models.py b/allenact_plugins/navigation_plugin/objectnav/models.py
index eb0d9ab0e..cdaf477b5 100644
--- a/allenact_plugins/navigation_plugin/objectnav/models.py
+++ b/allenact_plugins/navigation_plugin/objectnav/models.py
@@ -3,6 +3,7 @@
 Object navigation is currently available as a Task in AI2-THOR and
 Facebook's Habitat.
 """
+
 from typing import Optional, List, Dict, cast, Tuple, Sequence
 
 import gym
@@ -122,7 +123,10 @@ def __init__(
             good_uuids = [
                 uuid for uuid in [self.rgb_uuid, self.depth_uuid] if uuid is not None
             ]
-            cat_model = CatObservations(ordered_uuids=good_uuids, dim=-1,)
+            cat_model = CatObservations(
+                ordered_uuids=good_uuids,
+                dim=-1,
+            )
             after_cat_size = sum(
                 observation_space[uuid].shape[-1] for uuid in good_uuids
             )
@@ -308,7 +312,8 @@ def __init__(
         self.goal_space = observation_spaces.spaces[self.goal_uuid]
         if isinstance(self.goal_space, gym.spaces.Discrete):
             self.embed_goal = nn.Embedding(
-                num_embeddings=self.goal_space.n, embedding_dim=self.goal_embed_dims,
+                num_embeddings=self.goal_space.n,
+                embedding_dim=self.goal_embed_dims,
             )
         elif isinstance(self.goal_space, gym.spaces.Box):
             self.embed_goal = nn.Linear(self.goal_space.shape[-1], self.goal_embed_dims)
@@ -403,7 +408,12 @@ def forward(self, observations):
             self.compress_resnet(observations),
             self.distribute_target(observations),
         ]
-        x = self.target_obs_combiner(torch.cat(embs, dim=1,))
+        x = self.target_obs_combiner(
+            torch.cat(
+                embs,
+                dim=1,
+            )
+        )
         x = x.reshape(x.size(0), -1)  # flatten
 
         return self.adapt_output(x, use_agent, nstep, nsampler, nagent)
@@ -431,7 +441,8 @@ def __init__(
         self.goal_space = observation_spaces.spaces[self.goal_uuid]
         if isinstance(self.goal_space, gym.spaces.Discrete):
             self.embed_goal = nn.Embedding(
-                num_embeddings=self.goal_space.n, embedding_dim=self.goal_embed_dims,
+                num_embeddings=self.goal_space.n,
+                embedding_dim=self.goal_embed_dims,
             )
         elif isinstance(self.goal_space, gym.spaces.Box):
             self.embed_goal = nn.Linear(self.goal_space.shape[-1], self.goal_embed_dims)
@@ -550,12 +561,22 @@ def forward(self, observations):
             self.compress_rgb_resnet(observations),
             self.distribute_target(observations),
         ]
-        rgb_x = self.rgb_target_obs_combiner(torch.cat(rgb_embs, dim=1,))
+        rgb_x = self.rgb_target_obs_combiner(
+            torch.cat(
+                rgb_embs,
+                dim=1,
+            )
+        )
         depth_embs = [
             self.compress_depth_resnet(observations),
             self.distribute_target(observations),
         ]
-        depth_x = self.depth_target_obs_combiner(torch.cat(depth_embs, dim=1,))
+        depth_x = self.depth_target_obs_combiner(
+            torch.cat(
+                depth_embs,
+                dim=1,
+            )
+        )
         x = torch.cat([rgb_x, depth_x], dim=1)
         x = x.reshape(x.shape[0], -1)  # flatten
 
diff --git a/allenact_plugins/navigation_plugin/pointnav/models.py b/allenact_plugins/navigation_plugin/pointnav/models.py
index 658c11496..b534c54b4 100644
--- a/allenact_plugins/navigation_plugin/pointnav/models.py
+++ b/allenact_plugins/navigation_plugin/pointnav/models.py
@@ -3,6 +3,7 @@
 Object navigation is currently available as a Task in AI2-THOR and
 Facebook's Habitat.
 """
+
 from typing import Optional, List, Union, Sequence
 
 import gym
diff --git a/allenact_plugins/robothor_plugin/robothor_environment.py b/allenact_plugins/robothor_plugin/robothor_environment.py
index 7318702e3..8ab7fe541 100644
--- a/allenact_plugins/robothor_plugin/robothor_environment.py
+++ b/allenact_plugins/robothor_plugin/robothor_environment.py
@@ -53,7 +53,9 @@ def __init__(self, all_metadata_available: bool = True, **kwargs):
             )
 
         recursive_update(self.config, kwargs)
-        self.controller = Controller(**self.config,)
+        self.controller = Controller(
+            **self.config,
+        )
 
         self.all_metadata_available = all_metadata_available
 
@@ -70,9 +72,9 @@ def __init__(self, all_metadata_available: bool = True, **kwargs):
 
         self.agent_count = self.config["agentCount"]
 
-        self._extra_teleport_kwargs: Dict[
-            str, Any
-        ] = {}  # Used for backwards compatability with the teleport action
+        self._extra_teleport_kwargs: Dict[str, Any] = (
+            {}
+        )  # Used for backwards compatability with the teleport action
 
     def initialize_grid_dimensions(
         self, reachable_points: Collection[Dict[str, float]]
diff --git a/allenact_plugins/robothor_plugin/robothor_models.py b/allenact_plugins/robothor_plugin/robothor_models.py
index b3b16e4c4..3b6bd24e8 100644
--- a/allenact_plugins/robothor_plugin/robothor_models.py
+++ b/allenact_plugins/robothor_plugin/robothor_models.py
@@ -123,6 +123,10 @@ def forward(  # type:ignore
         dists, vals = self.actor_critic(x)
 
         return (
-            ActorCriticOutput(distributions=dists, values=vals, extras={},),
+            ActorCriticOutput(
+                distributions=dists,
+                values=vals,
+                extras={},
+            ),
             memory.set_tensor("rnn", rnn_hidden_states),
         )
diff --git a/allenact_plugins/robothor_plugin/robothor_sensors.py b/allenact_plugins/robothor_plugin/robothor_sensors.py
index d59c83aba..8564143be 100644
--- a/allenact_plugins/robothor_plugin/robothor_sensors.py
+++ b/allenact_plugins/robothor_plugin/robothor_sensors.py
@@ -113,7 +113,7 @@ def quaternion_from_coeff(coeffs: np.ndarray) -> np.quaternion:
 
     @staticmethod
     def cartesian_to_polar(x, y):
-        rho = np.sqrt(x ** 2 + y ** 2)
+        rho = np.sqrt(x**2 + y**2)
         phi = np.arctan2(y, x)
         return rho, phi
 
@@ -151,7 +151,12 @@ def get_observation(
         )
 
 
-class DepthSensorThor(DepthSensor[THOR_ENV_TYPE, THOR_TASK_TYPE,],):
+class DepthSensorThor(
+    DepthSensor[
+        THOR_ENV_TYPE,
+        THOR_TASK_TYPE,
+    ],
+):
     def __init__(
         self,
         use_resnet_normalization: Optional[bool] = None,
diff --git a/allenact_plugins/robothor_plugin/robothor_task_samplers.py b/allenact_plugins/robothor_plugin/robothor_task_samplers.py
index 853a6ffc4..2deac2d49 100644
--- a/allenact_plugins/robothor_plugin/robothor_task_samplers.py
+++ b/allenact_plugins/robothor_plugin/robothor_task_samplers.py
@@ -59,9 +59,9 @@ def __init__(
             self.scene_counter: Optional[int] = None
             self.scene_order: Optional[List[str]] = None
             self.scene_id: Optional[int] = None
-            self.scene_period: Optional[
-                Union[str, int]
-            ] = scene_period  # default makes a random choice
+            self.scene_period: Optional[Union[str, int]] = (
+                scene_period  # default makes a random choice
+            )
             self.max_tasks: Optional[int] = None
             self.reset_tasks = max_tasks
         else:
@@ -564,9 +564,9 @@ def __init__(
         self.scene_counter: Optional[int] = None
         self.scene_order: Optional[List[str]] = None
         self.scene_id: Optional[int] = None
-        self.scene_period: Optional[
-            Union[str, int]
-        ] = scene_period  # default makes a random choice
+        self.scene_period: Optional[Union[str, int]] = (
+            scene_period  # default makes a random choice
+        )
         self.max_tasks: Optional[int] = None
         self.reset_tasks = max_tasks
 
@@ -962,9 +962,9 @@ def __init__(
         self.scene_counter: Optional[int] = None
         self.scene_order: Optional[List[str]] = None
         self.scene_id: Optional[int] = None
-        self.scene_period: Optional[
-            Union[str, int]
-        ] = scene_period  # default makes a random choice
+        self.scene_period: Optional[Union[str, int]] = (
+            scene_period  # default makes a random choice
+        )
         self.max_tasks: Optional[int] = None
         self.reset_tasks = max_tasks
 
@@ -1109,7 +1109,7 @@ def next_task(
                 # + ["%4.2f" % pose1["rotation"]["y"]]
                 # + ["%4.2f" % pose2[k] for k in ["x", "y", "z"]]
                 # + ["%4.2f" % pose2["rotation"]["y"]]
-                + ["%d" % random.randint(0, 2 ** 63 - 1)]
+                + ["%d" % random.randint(0, 2**63 - 1)]
             ),
         }
 
diff --git a/allenact_plugins/robothor_plugin/robothor_tasks.py b/allenact_plugins/robothor_plugin/robothor_tasks.py
index 8b16ddec3..76feeb0b5 100644
--- a/allenact_plugins/robothor_plugin/robothor_tasks.py
+++ b/allenact_plugins/robothor_plugin/robothor_tasks.py
@@ -64,9 +64,9 @@ def __init__(
         self._rewards: List[float] = []
         self._distance_to_goal: List[float] = []
         self._metrics = None
-        self.path: List[
-            Any
-        ] = []  # the initial coordinate will be directly taken from the optimal path
+        self.path: List[Any] = (
+            []
+        )  # the initial coordinate will be directly taken from the optimal path
         self.travelled_distance = 0.0
 
         self.task_info["followed_path"] = [self.env.agent_state()]
@@ -349,7 +349,10 @@ def shaping(self) -> float:
         self.closest_geo_distance = min(self.closest_geo_distance, geodesic_distance)
 
         return (
-            max(min(rew, max_reward_mag), -max_reward_mag,)
+            max(
+                min(rew, max_reward_mag),
+                -max_reward_mag,
+            )
             * self.reward_configs["shaping_weight"]
         )
 
diff --git a/projects/babyai_baselines/experiments/base.py b/projects/babyai_baselines/experiments/base.py
index 1c54f74e9..d40614dfb 100644
--- a/projects/babyai_baselines/experiments/base.py
+++ b/projects/babyai_baselines/experiments/base.py
@@ -87,7 +87,9 @@ def rl_loss_default(cls, alg: str, steps: Optional[int] = None):
             assert steps is not None
             return {
                 "loss": Builder(
-                    PPO, kwargs={"clip_decay": LinearDecay(steps)}, default=PPOConfig,
+                    PPO,
+                    kwargs={"clip_decay": LinearDecay(steps)},
+                    default=PPOConfig,
                 ),
                 "num_mini_batch": cls.PPO_NUM_MINI_BATCH,
                 "update_repeats": 4,
@@ -226,13 +228,13 @@ def test_task_sampler_args(
             process_ind < (self.NUM_TEST_TASKS % total_processes)
         )
         task_seeds_list = [
-            2 ** 31 - 1 + self.TEST_SEED_OFFSET + process_ind + total_processes * i
+            2**31 - 1 + self.TEST_SEED_OFFSET + process_ind + total_processes * i
             for i in range(max_tasks)
         ]
         # print(max_tasks, process_ind, total_processes, task_seeds_list)
 
         assert len(task_seeds_list) == 0 or (
-            min(task_seeds_list) >= 0 and max(task_seeds_list) <= 2 ** 32 - 1
+            min(task_seeds_list) >= 0 and max(task_seeds_list) <= 2**32 - 1
         )
 
         train_sampler_args = self.train_task_sampler_args(
diff --git a/projects/babyai_baselines/experiments/go_to_local/a2c.py b/projects/babyai_baselines/experiments/go_to_local/a2c.py
index 1ec5c22fa..edd49bd54 100644
--- a/projects/babyai_baselines/experiments/go_to_local/a2c.py
+++ b/projects/babyai_baselines/experiments/go_to_local/a2c.py
@@ -28,10 +28,13 @@ def training_pipeline(cls, **kwargs):
         a2c_info = cls.rl_loss_default("a2c", steps=total_training_steps)
 
         return cls._training_pipeline(
-            named_losses={"a2c_loss": a2c_info["loss"],},
+            named_losses={
+                "a2c_loss": a2c_info["loss"],
+            },
             pipeline_stages=[
                 PipelineStage(
-                    loss_names=["a2c_loss"], max_stage_steps=total_training_steps,
+                    loss_names=["a2c_loss"],
+                    max_stage_steps=total_training_steps,
                 ),
             ],
             num_mini_batch=a2c_info["num_mini_batch"],
diff --git a/projects/babyai_baselines/experiments/go_to_local/base.py b/projects/babyai_baselines/experiments/go_to_local/base.py
index d1f3eb4ca..9eee300a7 100644
--- a/projects/babyai_baselines/experiments/go_to_local/base.py
+++ b/projects/babyai_baselines/experiments/go_to_local/base.py
@@ -87,11 +87,13 @@ def _training_pipeline(  # type:ignore
             should_log=cls.SHOULD_LOG,
             pipeline_stages=pipeline_stages,
             named_storages=named_storages,
-            lr_scheduler_builder=Builder(
-                LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)}  # type: ignore
-            )
-            if cls.USE_LR_DECAY
-            else None,
+            lr_scheduler_builder=(
+                Builder(
+                    LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)}  # type: ignore
+                )
+                if cls.USE_LR_DECAY
+                else None
+            ),
         )
 
     @classmethod
diff --git a/projects/babyai_baselines/experiments/go_to_local/bc.py b/projects/babyai_baselines/experiments/go_to_local/bc.py
index c71d6b52e..c42e8c040 100644
--- a/projects/babyai_baselines/experiments/go_to_local/bc.py
+++ b/projects/babyai_baselines/experiments/go_to_local/bc.py
@@ -21,10 +21,13 @@ def training_pipeline(cls, **kwargs):
         imitation_info = cls.rl_loss_default("imitation")
 
         return cls._training_pipeline(
-            named_losses={"imitation_loss": imitation_info["loss"],},
+            named_losses={
+                "imitation_loss": imitation_info["loss"],
+            },
             pipeline_stages=[
                 PipelineStage(
-                    loss_names=["imitation_loss"], max_stage_steps=total_train_steps,
+                    loss_names=["imitation_loss"],
+                    max_stage_steps=total_train_steps,
                 ),
             ],
             num_mini_batch=min(
diff --git a/projects/babyai_baselines/experiments/go_to_local/bc_teacher_forcing.py b/projects/babyai_baselines/experiments/go_to_local/bc_teacher_forcing.py
index 7a70719b9..f79fa15ca 100644
--- a/projects/babyai_baselines/experiments/go_to_local/bc_teacher_forcing.py
+++ b/projects/babyai_baselines/experiments/go_to_local/bc_teacher_forcing.py
@@ -31,12 +31,16 @@ def training_pipeline(cls, **kwargs):
         imitation_info = cls.rl_loss_default("imitation")
 
         return cls._training_pipeline(
-            named_losses={"imitation_loss": imitation_info["loss"],},
+            named_losses={
+                "imitation_loss": imitation_info["loss"],
+            },
             pipeline_stages=[
                 PipelineStage(
                     loss_names=["imitation_loss"],
                     teacher_forcing=LinearDecay(
-                        startp=1.0, endp=1.0, steps=total_train_steps,
+                        startp=1.0,
+                        endp=1.0,
+                        steps=total_train_steps,
                     ),
                     max_stage_steps=total_train_steps,
                 ),
diff --git a/projects/babyai_baselines/experiments/go_to_local/dagger.py b/projects/babyai_baselines/experiments/go_to_local/dagger.py
index 6120380b0..ad0488aa5 100644
--- a/projects/babyai_baselines/experiments/go_to_local/dagger.py
+++ b/projects/babyai_baselines/experiments/go_to_local/dagger.py
@@ -26,7 +26,9 @@ def training_pipeline(cls, **kwargs):
                 PipelineStage(
                     loss_names=["imitation_loss"],
                     teacher_forcing=LinearDecay(
-                        startp=1.0, endp=0.0, steps=total_train_steps // 2,
+                        startp=1.0,
+                        endp=0.0,
+                        steps=total_train_steps // 2,
                     ),
                     max_stage_steps=total_train_steps,
                 )
diff --git a/projects/babyai_baselines/experiments/go_to_local/ppo.py b/projects/babyai_baselines/experiments/go_to_local/ppo.py
index 5199cc5b0..00e713bef 100644
--- a/projects/babyai_baselines/experiments/go_to_local/ppo.py
+++ b/projects/babyai_baselines/experiments/go_to_local/ppo.py
@@ -28,10 +28,13 @@ def training_pipeline(cls, **kwargs):
         ppo_info = cls.rl_loss_default("ppo", steps=total_train_steps)
 
         return cls._training_pipeline(
-            named_losses={"ppo_loss": ppo_info["loss"],},
+            named_losses={
+                "ppo_loss": ppo_info["loss"],
+            },
             pipeline_stages=[
                 PipelineStage(
-                    loss_names=["ppo_loss"], max_stage_steps=total_train_steps,
+                    loss_names=["ppo_loss"],
+                    max_stage_steps=total_train_steps,
                 ),
             ],
             num_mini_batch=ppo_info["num_mini_batch"],
diff --git a/projects/babyai_baselines/experiments/go_to_obj/a2c.py b/projects/babyai_baselines/experiments/go_to_obj/a2c.py
index 78d2394be..60741378c 100644
--- a/projects/babyai_baselines/experiments/go_to_obj/a2c.py
+++ b/projects/babyai_baselines/experiments/go_to_obj/a2c.py
@@ -19,10 +19,13 @@ def training_pipeline(cls, **kwargs):
         a2c_info = cls.rl_loss_default("a2c", steps=total_training_steps)
 
         return cls._training_pipeline(
-            named_losses={"a2c_loss": a2c_info["loss"],},
+            named_losses={
+                "a2c_loss": a2c_info["loss"],
+            },
             pipeline_stages=[
                 PipelineStage(
-                    loss_names=["a2c_loss"], max_stage_steps=total_training_steps,
+                    loss_names=["a2c_loss"],
+                    max_stage_steps=total_training_steps,
                 ),
             ],
             num_mini_batch=a2c_info["num_mini_batch"],
diff --git a/projects/babyai_baselines/experiments/go_to_obj/base.py b/projects/babyai_baselines/experiments/go_to_obj/base.py
index 084b4356a..0aa9f222d 100644
--- a/projects/babyai_baselines/experiments/go_to_obj/base.py
+++ b/projects/babyai_baselines/experiments/go_to_obj/base.py
@@ -61,7 +61,7 @@ def _training_pipeline(  # type:ignore
         metric_accumulate_interval = (
             cls.METRIC_ACCUMULATE_INTERVAL()
         )  # Log every 10 max length tasks
-        save_interval = 2 ** 31
+        save_interval = 2**31
         gamma = 0.99
 
         use_gae = "reinforce_loss" not in named_losses
@@ -83,11 +83,13 @@ def _training_pipeline(  # type:ignore
             advance_scene_rollout_period=None,
             should_log=cls.SHOULD_LOG,
             pipeline_stages=pipeline_stages,
-            lr_scheduler_builder=Builder(
-                LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)}  # type: ignore
-            )
-            if cls.USE_LR_DECAY
-            else None,
+            lr_scheduler_builder=(
+                Builder(
+                    LambdaLR, {"lr_lambda": LinearDecay(steps=total_train_steps)}  # type: ignore
+                )
+                if cls.USE_LR_DECAY
+                else None
+            ),
             **kwargs,
         )
 
diff --git a/projects/babyai_baselines/experiments/go_to_obj/bc.py b/projects/babyai_baselines/experiments/go_to_obj/bc.py
index 12233724b..a5cbdd4ca 100644
--- a/projects/babyai_baselines/experiments/go_to_obj/bc.py
+++ b/projects/babyai_baselines/experiments/go_to_obj/bc.py
@@ -21,10 +21,13 @@ def training_pipeline(cls, **kwargs):
         imitation_info = cls.rl_loss_default("imitation")
 
         return cls._training_pipeline(
-            named_losses={"imitation_loss": imitation_info["loss"],},
+            named_losses={
+                "imitation_loss": imitation_info["loss"],
+            },
             pipeline_stages=[
                 PipelineStage(
-                    loss_names=["imitation_loss"], max_stage_steps=total_train_steps,
+                    loss_names=["imitation_loss"],
+                    max_stage_steps=total_train_steps,
                 ),
             ],
             num_mini_batch=min(
diff --git a/projects/babyai_baselines/experiments/go_to_obj/bc_teacher_forcing.py b/projects/babyai_baselines/experiments/go_to_obj/bc_teacher_forcing.py
index 49c32190d..7b4be27be 100644
--- a/projects/babyai_baselines/experiments/go_to_obj/bc_teacher_forcing.py
+++ b/projects/babyai_baselines/experiments/go_to_obj/bc_teacher_forcing.py
@@ -21,12 +21,16 @@ def training_pipeline(cls, **kwargs):
         imitation_info = cls.rl_loss_default("imitation")
 
         return cls._training_pipeline(
-            named_losses={"imitation_loss": imitation_info["loss"],},
+            named_losses={
+                "imitation_loss": imitation_info["loss"],
+            },
             pipeline_stages=[
                 PipelineStage(
                     loss_names=["imitation_loss"],
                     teacher_forcing=LinearDecay(
-                        startp=1.0, endp=1.0, steps=total_train_steps,
+                        startp=1.0,
+                        endp=1.0,
+                        steps=total_train_steps,
                     ),
                     max_stage_steps=total_train_steps,
                 ),
diff --git a/projects/babyai_baselines/experiments/go_to_obj/dagger.py b/projects/babyai_baselines/experiments/go_to_obj/dagger.py
index 8f97dd06d..54d200cfd 100644
--- a/projects/babyai_baselines/experiments/go_to_obj/dagger.py
+++ b/projects/babyai_baselines/experiments/go_to_obj/dagger.py
@@ -26,7 +26,9 @@ def training_pipeline(cls, **kwargs):
                 PipelineStage(
                     loss_names=["imitation_loss"],
                     teacher_forcing=LinearDecay(
-                        startp=1.0, endp=0.0, steps=total_train_steps // 2,
+                        startp=1.0,
+                        endp=0.0,
+                        steps=total_train_steps // 2,
                     ),
                     max_stage_steps=total_train_steps,
                 )
diff --git a/projects/babyai_baselines/experiments/go_to_obj/ppo.py b/projects/babyai_baselines/experiments/go_to_obj/ppo.py
index ce7d6b8ad..ce418a110 100644
--- a/projects/babyai_baselines/experiments/go_to_obj/ppo.py
+++ b/projects/babyai_baselines/experiments/go_to_obj/ppo.py
@@ -17,10 +17,13 @@ def training_pipeline(cls, **kwargs):
         ppo_info = cls.rl_loss_default("ppo", steps=total_train_steps)
 
         return cls._training_pipeline(
-            named_losses={"ppo_loss": ppo_info["loss"],},
+            named_losses={
+                "ppo_loss": ppo_info["loss"],
+            },
             pipeline_stages=[
                 PipelineStage(
-                    loss_names=["ppo_loss"], max_stage_steps=total_train_steps,
+                    loss_names=["ppo_loss"],
+                    max_stage_steps=total_train_steps,
                 ),
             ],
             num_mini_batch=ppo_info["num_mini_batch"],
diff --git a/projects/gym_baselines/experiments/gym_mujoco_ddppo.py b/projects/gym_baselines/experiments/gym_mujoco_ddppo.py
index d20ec4c69..01cf64295 100644
--- a/projects/gym_baselines/experiments/gym_mujoco_ddppo.py
+++ b/projects/gym_baselines/experiments/gym_mujoco_ddppo.py
@@ -57,6 +57,7 @@ def training_pipeline(cls, **kwargs) -> TrainingPipeline:
             save_interval=save_interval,
             metric_accumulate_interval=metric_accumulate_interval,
             lr_scheduler_builder=Builder(
-                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)},
+                LambdaLR,
+                {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)},
             ),
         )
diff --git a/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_thor_base.py b/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_thor_base.py
index ab39333f7..2da82b6b5 100644
--- a/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_thor_base.py
+++ b/projects/manipulathor_baselines/armpointnav_baselines/experiments/armpointnav_thor_base.py
@@ -109,9 +109,9 @@ def machine_params(self, mode="train", **kwargs):
         return MachineParams(
             nprocesses=nprocesses,
             devices=gpu_ids,
-            sampler_devices=sampler_devices
-            if mode == "train"
-            else gpu_ids,  # ignored with > 1 gpu_ids
+            sampler_devices=(
+                sampler_devices if mode == "train" else gpu_ids
+            ),  # ignored with > 1 gpu_ids
             sensor_preprocessor_graph=sensor_preprocessor_graph,
         )
 
diff --git a/projects/manipulathor_baselines/armpointnav_baselines/models/arm_pointnav_models.py b/projects/manipulathor_baselines/armpointnav_baselines/models/arm_pointnav_models.py
index e8845e548..30c925feb 100644
--- a/projects/manipulathor_baselines/armpointnav_baselines/models/arm_pointnav_models.py
+++ b/projects/manipulathor_baselines/armpointnav_baselines/models/arm_pointnav_models.py
@@ -2,6 +2,7 @@
 
 Arm Point Navigation is currently available as a Task in ManipulaTHOR.
 """
+
 from typing import Tuple, Optional
 
 import gym
diff --git a/projects/manipulathor_baselines/armpointnav_baselines/models/disjoint_arm_pointnav_models.py b/projects/manipulathor_baselines/armpointnav_baselines/models/disjoint_arm_pointnav_models.py
index 093d93e8b..2ffc3e959 100644
--- a/projects/manipulathor_baselines/armpointnav_baselines/models/disjoint_arm_pointnav_models.py
+++ b/projects/manipulathor_baselines/armpointnav_baselines/models/disjoint_arm_pointnav_models.py
@@ -2,6 +2,7 @@
 
 Arm Point Navigation is currently available as a Task in ManipulaTHOR.
 """
+
 from typing import Tuple, Optional
 
 import gym
diff --git a/projects/manipulathor_baselines/armpointnav_baselines/models/manipulathor_net_utils.py b/projects/manipulathor_baselines/armpointnav_baselines/models/manipulathor_net_utils.py
index 1f2046cc3..03a328b31 100644
--- a/projects/manipulathor_baselines/armpointnav_baselines/models/manipulathor_net_utils.py
+++ b/projects/manipulathor_baselines/armpointnav_baselines/models/manipulathor_net_utils.py
@@ -10,7 +10,7 @@ def upshuffle(
     return nn.Sequential(
         nn.Conv2d(
             in_planes,
-            out_planes * upscale_factor ** 2,
+            out_planes * upscale_factor**2,
             kernel_size=kernel_size,
             stride=stride,
             padding=padding,
@@ -26,7 +26,7 @@ def upshufflenorelu(
     return nn.Sequential(
         nn.Conv2d(
             in_planes,
-            out_planes * upscale_factor ** 2,
+            out_planes * upscale_factor**2,
             kernel_size=kernel_size,
             stride=stride,
             padding=padding,
@@ -55,22 +55,31 @@ def conv2d_block(in_planes, out_planes, kernel_size, stride=1, padding=1):
 
 def combine_block_w_do(in_planes, out_planes, dropout=0.0):
     return nn.Sequential(
-        nn.Conv2d(in_planes, out_planes, 1, 1), nn.LeakyReLU(), nn.Dropout(dropout),
+        nn.Conv2d(in_planes, out_planes, 1, 1),
+        nn.LeakyReLU(),
+        nn.Dropout(dropout),
     )
 
 
 def combine_block_no_do(in_planes, out_planes):
-    return nn.Sequential(nn.Conv2d(in_planes, out_planes, 1, 1), nn.LeakyReLU(),)
+    return nn.Sequential(
+        nn.Conv2d(in_planes, out_planes, 1, 1),
+        nn.LeakyReLU(),
+    )
 
 
 def linear_block(in_features, out_features, dropout=0.0):
     return nn.Sequential(
-        nn.Linear(in_features, out_features), nn.LeakyReLU(), nn.Dropout(dropout),
+        nn.Linear(in_features, out_features),
+        nn.LeakyReLU(),
+        nn.Dropout(dropout),
     )
 
 
 def linear_block_norelu(in_features, out_features):
-    return nn.Sequential(nn.Linear(in_features, out_features),)
+    return nn.Sequential(
+        nn.Linear(in_features, out_features),
+    )
 
 
 def input_embedding_net(list_of_feature_sizes, dropout=0.0):
diff --git a/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py b/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py
index af57c5079..0246d18ee 100644
--- a/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py
+++ b/projects/objectnav_baselines/experiments/habitat/clip/objectnav_habitat_rgb_clipresnet50gru_ddppo_increasingrollouts.py
@@ -82,14 +82,16 @@ def training_pipeline(self, **kwargs) -> TrainingPipeline:
                     loss_names=list(named_losses.keys()),
                     max_stage_steps=batch_steps_1,
                     training_settings=TrainingSettings(
-                        num_steps=64, metric_accumulate_interval=log_interval_med,
+                        num_steps=64,
+                        metric_accumulate_interval=log_interval_med,
                     ),
                 ),
                 PipelineStage(
                     loss_names=list(named_losses.keys()),
                     max_stage_steps=batch_steps_2,
                     training_settings=TrainingSettings(
-                        num_steps=128, metric_accumulate_interval=log_interval_large,
+                        num_steps=128,
+                        metric_accumulate_interval=log_interval_large,
                     ),
                 ),
             ],
diff --git a/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py b/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
index 61db45665..30209a0af 100644
--- a/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
+++ b/projects/objectnav_baselines/experiments/habitat/objectnav_habitat_base.py
@@ -428,7 +428,9 @@ def machine_params(self, mode="train", **kwargs):
 
     def make_sampler_fn(self, **kwargs) -> TaskSampler:
         return ObjectNavTaskSampler(
-            task_kwargs={"look_constraints": self.look_constraints,},
+            task_kwargs={
+                "look_constraints": self.look_constraints,
+            },
             **{"failed_end_reward": self.FAILED_END_REWARD, **kwargs},  # type: ignore
         )
 
diff --git a/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_depth_resnet18gru_ddppo.py b/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_depth_resnet18gru_ddppo.py
index 70bfdfc05..6ce2d0fc1 100644
--- a/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_depth_resnet18gru_ddppo.py
+++ b/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_depth_resnet18gru_ddppo.py
@@ -26,7 +26,9 @@ class ObjectNaviThorDepthPPOExperimentConfig(ObjectNaviThorBaseConfig):
             use_normalization=True,
             uuid="depth_lowres",
         ),
-        GoalObjectTypeThorSensor(object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,),
+        GoalObjectTypeThorSensor(
+            object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,
+        ),
     )
 
     def __init__(self, **kwargs):
diff --git a/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgb_resnet18gru_ddppo.py b/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgb_resnet18gru_ddppo.py
index 016a2ba57..40f168d82 100644
--- a/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgb_resnet18gru_ddppo.py
+++ b/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgb_resnet18gru_ddppo.py
@@ -28,7 +28,9 @@ class ObjectNaviThorRGBPPOExperimentConfig(ObjectNaviThorBaseConfig):
             use_resnet_normalization=True,
             uuid="rgb_lowres",
         ),
-        GoalObjectTypeThorSensor(object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,),
+        GoalObjectTypeThorSensor(
+            object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,
+        ),
     ]
 
     def __init__(self, **kwargs):
diff --git a/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgbd_resnet18gru_ddppo.py b/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgbd_resnet18gru_ddppo.py
index 7d3722589..ffb1b7aaf 100644
--- a/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgbd_resnet18gru_ddppo.py
+++ b/projects/objectnav_baselines/experiments/ithor/objectnav_ithor_rgbd_resnet18gru_ddppo.py
@@ -35,7 +35,9 @@ class ObjectNaviThorRGBDPPOExperimentConfig(ObjectNaviThorBaseConfig):
             use_normalization=True,
             uuid="depth_lowres",
         ),
-        GoalObjectTypeThorSensor(object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,),
+        GoalObjectTypeThorSensor(
+            object_types=ObjectNaviThorBaseConfig.TARGET_TYPES,
+        ),
     ]
 
     def __init__(self, **kwargs):
diff --git a/projects/objectnav_baselines/experiments/objectnav_thor_base.py b/projects/objectnav_baselines/experiments/objectnav_thor_base.py
index 13e925271..710ff3960 100644
--- a/projects/objectnav_baselines/experiments/objectnav_thor_base.py
+++ b/projects/objectnav_baselines/experiments/objectnav_thor_base.py
@@ -101,9 +101,9 @@ def env_args(self):
         return dict(
             width=self.CAMERA_WIDTH,
             height=self.CAMERA_HEIGHT,
-            commit_id=self.THOR_COMMIT_ID
-            if not self.headless
-            else ai2thor.build.COMMIT_ID,
+            commit_id=(
+                self.THOR_COMMIT_ID if not self.headless else ai2thor.build.COMMIT_ID
+            ),
             stochastic=True,
             continuousMode=True,
             applyActionNoise=self.STOCHASTIC,
@@ -174,9 +174,9 @@ def machine_params(self, mode="train", **kwargs):
         return MachineParams(
             nprocesses=nprocesses,
             devices=devices,
-            sampler_devices=sampler_devices
-            if mode == "train"
-            else devices,  # ignored with > 1 gpu_ids
+            sampler_devices=(
+                sampler_devices if mode == "train" else devices
+            ),  # ignored with > 1 gpu_ids
             sensor_preprocessor_graph=sensor_preprocessor_graph,
         )
 
diff --git a/projects/objectnav_baselines/experiments/robothor/beta/objectnav_robothor_rgb_unfrozenresnet18gru_vdr_ddppo.py b/projects/objectnav_baselines/experiments/robothor/beta/objectnav_robothor_rgb_unfrozenresnet18gru_vdr_ddppo.py
index 724945e75..5d19d091e 100644
--- a/projects/objectnav_baselines/experiments/robothor/beta/objectnav_robothor_rgb_unfrozenresnet18gru_vdr_ddppo.py
+++ b/projects/objectnav_baselines/experiments/robothor/beta/objectnav_robothor_rgb_unfrozenresnet18gru_vdr_ddppo.py
@@ -41,7 +41,9 @@
 
 
 def compute_inv_dyn_action_logits(
-    model, img0, img1,
+    model,
+    img0,
+    img1,
 ):
     rgb_uuid = model.visual_encoder.rgb_uuid
     img0_enc = model.visual_encoder({rgb_uuid: img0.unsqueeze(0)}).squeeze(0)
@@ -216,7 +218,8 @@ def training_pipeline(self, **kwargs):
                             storage_uuid="discrete_vdr",
                             loss_names=["inv_dyn_vdr"],
                             training_settings=TrainingSettings(
-                                num_mini_batch=1, update_repeats=1,
+                                num_mini_batch=1,
+                                update_repeats=1,
                             ),
                         ),
                     ],
@@ -230,7 +233,9 @@ def training_pipeline(self, **kwargs):
     def create_model(self, **kwargs) -> nn.Module:
         model = self.model_creation_handler.create_model(**kwargs)
         model.inv_dyn_mlp = nn.Sequential(
-            nn.Linear(1024, 256), nn.ReLU(inplace=True), nn.Linear(256, 6),
+            nn.Linear(1024, 256),
+            nn.ReLU(inplace=True),
+            nn.Linear(256, 6),
         )
         return model
 
diff --git a/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet18gru_dagger.py b/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet18gru_dagger.py
index 0d384cefd..2fa9fd104 100644
--- a/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet18gru_dagger.py
+++ b/projects/objectnav_baselines/experiments/robothor/objectnav_robothor_rgb_resnet18gru_dagger.py
@@ -33,7 +33,9 @@ class ObjectNavRoboThorRGBDAggerExperimentConfig(ObjectNavRoboThorBaseConfig):
         GoalObjectTypeThorSensor(
             object_types=ObjectNavRoboThorBaseConfig.TARGET_TYPES,
         ),
-        ExpertActionSensor(nactions=len(ObjectNavTask.class_action_names()),),
+        ExpertActionSensor(
+            nactions=len(ObjectNavTask.class_action_names()),
+        ),
     ]
 
     def __init__(self, **kwargs):
diff --git a/projects/objectnav_baselines/mixins.py b/projects/objectnav_baselines/mixins.py
index b07b0a3b7..11d4abc9c 100644
--- a/projects/objectnav_baselines/mixins.py
+++ b/projects/objectnav_baselines/mixins.py
@@ -120,9 +120,9 @@ def create_model(self, **kwargs) -> nn.Module:
             observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
             goal_sensor_uuid=goal_sensor_uuid,
             rgb_resnet_preprocessor_uuid="rgb_resnet_imagenet" if has_rgb else None,
-            depth_resnet_preprocessor_uuid="depth_resnet_imagenet"
-            if has_depth
-            else None,
+            depth_resnet_preprocessor_uuid=(
+                "depth_resnet_imagenet" if has_depth else None
+            ),
             hidden_size=512,
             goal_dims=32,
         )
@@ -154,9 +154,9 @@ def create_model(self, **kwargs) -> nn.Module:
             rgb_uuid=rgb_uuid,
             depth_uuid=depth_uuid,
             goal_sensor_uuid=goal_sensor_uuid,
-            hidden_size=192
-            if self.multiple_beliefs and len(self.auxiliary_uuids) > 1
-            else 512,
+            hidden_size=(
+                192 if self.multiple_beliefs and len(self.auxiliary_uuids) > 1 else 512
+            ),
             backbone=self.backbone,
             resnet_baseplanes=32,
             object_type_embedding_dim=32,
@@ -199,7 +199,9 @@ def training_pipeline(
             update_repeats=update_repeats,
             max_grad_norm=max_grad_norm,
             num_steps=num_steps,
-            named_losses={"imitation_loss": Imitation(),},
+            named_losses={
+                "imitation_loss": Imitation(),
+            },
             gamma=gamma,
             use_gae=use_gae,
             gae_lambda=gae_lambda,
@@ -208,18 +210,25 @@ def training_pipeline(
                 PipelineStage(
                     loss_names=["imitation_loss"],
                     max_stage_steps=tf_steps,
-                    teacher_forcing=LinearDecay(startp=1.0, endp=1.0, steps=tf_steps,),
+                    teacher_forcing=LinearDecay(
+                        startp=1.0,
+                        endp=1.0,
+                        steps=tf_steps,
+                    ),
                 ),
                 PipelineStage(
                     loss_names=["imitation_loss"],
                     max_stage_steps=anneal_steps + il_no_tf_steps,
                     teacher_forcing=LinearDecay(
-                        startp=1.0, endp=0.0, steps=anneal_steps,
+                        startp=1.0,
+                        endp=0.0,
+                        steps=anneal_steps,
                     ),
                 ),
             ],
             lr_scheduler_builder=Builder(
-                LambdaLR, {"lr_lambda": LinearDecay(steps=training_steps)},
+                LambdaLR,
+                {"lr_lambda": LinearDecay(steps=training_steps)},
             ),
         )
 
@@ -236,54 +245,76 @@ def update_with_auxiliary_losses(
     total_aux_losses: Dict[str, Tuple[AbstractActorCriticLoss, float]] = {
         InverseDynamicsLoss.UUID: (
             InverseDynamicsLoss(
-                subsample_rate=0.2, subsample_min_num=10,  # TODO: test its effects
+                subsample_rate=0.2,
+                subsample_min_num=10,  # TODO: test its effects
             ),
             0.05 * aux_loss_total_weight,  # should times 2
         ),
         TemporalDistanceLoss.UUID: (
             TemporalDistanceLoss(
-                num_pairs=8, epsiode_len_min=5,  # TODO: test its effects
+                num_pairs=8,
+                epsiode_len_min=5,  # TODO: test its effects
             ),
             0.2 * aux_loss_total_weight,  # should times 2
         ),
         CPCA1Loss.UUID: (
-            CPCA1Loss(subsample_rate=0.2,),  # TODO: test its effects
+            CPCA1Loss(
+                subsample_rate=0.2,
+            ),  # TODO: test its effects
             0.05 * aux_loss_total_weight,  # should times 2
         ),
         CPCA2Loss.UUID: (
-            CPCA2Loss(subsample_rate=0.2,),  # TODO: test its effects
+            CPCA2Loss(
+                subsample_rate=0.2,
+            ),  # TODO: test its effects
             0.05 * aux_loss_total_weight,  # should times 2
         ),
         CPCA4Loss.UUID: (
-            CPCA4Loss(subsample_rate=0.2,),  # TODO: test its effects
+            CPCA4Loss(
+                subsample_rate=0.2,
+            ),  # TODO: test its effects
             0.05 * aux_loss_total_weight,  # should times 2
         ),
         CPCA8Loss.UUID: (
-            CPCA8Loss(subsample_rate=0.2,),  # TODO: test its effects
+            CPCA8Loss(
+                subsample_rate=0.2,
+            ),  # TODO: test its effects
             0.05 * aux_loss_total_weight,  # should times 2
         ),
         CPCA16Loss.UUID: (
-            CPCA16Loss(subsample_rate=0.2,),  # TODO: test its effects
+            CPCA16Loss(
+                subsample_rate=0.2,
+            ),  # TODO: test its effects
             0.05 * aux_loss_total_weight,  # should times 2
         ),
         CPCA1SoftMaxLoss.UUID: (
-            CPCA1SoftMaxLoss(subsample_rate=1.0,),
+            CPCA1SoftMaxLoss(
+                subsample_rate=1.0,
+            ),
             0.05 * aux_loss_total_weight,  # should times 2
         ),
         CPCA2SoftMaxLoss.UUID: (
-            CPCA2SoftMaxLoss(subsample_rate=1.0,),
+            CPCA2SoftMaxLoss(
+                subsample_rate=1.0,
+            ),
             0.05 * aux_loss_total_weight,  # should times 2
         ),
         CPCA4SoftMaxLoss.UUID: (
-            CPCA4SoftMaxLoss(subsample_rate=1.0,),
+            CPCA4SoftMaxLoss(
+                subsample_rate=1.0,
+            ),
             0.05 * aux_loss_total_weight,  # should times 2
         ),
         CPCA8SoftMaxLoss.UUID: (
-            CPCA8SoftMaxLoss(subsample_rate=1.0,),
+            CPCA8SoftMaxLoss(
+                subsample_rate=1.0,
+            ),
             0.05 * aux_loss_total_weight,  # should times 2
         ),
         CPCA16SoftMaxLoss.UUID: (
-            CPCA16SoftMaxLoss(subsample_rate=1.0,),
+            CPCA16SoftMaxLoss(
+                subsample_rate=1.0,
+            ),
             0.05 * aux_loss_total_weight,  # should times 2
         ),
     }
@@ -353,9 +384,9 @@ def training_pipeline(
                     loss_weights=[val[1] for val in named_losses.values()],
                 )
             ],
-            lr_scheduler_builder=Builder(
-                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
-            )
-            if anneal_lr
-            else None,
+            lr_scheduler_builder=(
+                Builder(LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)})
+                if anneal_lr
+                else None
+            ),
         )
diff --git a/projects/pointnav_baselines/experiments/pointnav_thor_base.py b/projects/pointnav_baselines/experiments/pointnav_thor_base.py
index 945761f30..203d3cc08 100644
--- a/projects/pointnav_baselines/experiments/pointnav_thor_base.py
+++ b/projects/pointnav_baselines/experiments/pointnav_thor_base.py
@@ -107,9 +107,9 @@ def machine_params(self, mode="train", **kwargs):
         return MachineParams(
             nprocesses=nprocesses,
             devices=gpu_ids,
-            sampler_devices=sampler_devices
-            if mode == "train"
-            else gpu_ids,  # ignored with > 1 gpu_ids
+            sampler_devices=(
+                sampler_devices if mode == "train" else gpu_ids
+            ),  # ignored with > 1 gpu_ids
             sensor_preprocessor_graph=sensor_preprocessor_graph,
         )
 
@@ -186,7 +186,10 @@ def _get_sampler_args_for_scene_split(
             "seed": seeds[process_ind] if seeds is not None else None,
             "deterministic_cudnn": deterministic_cudnn,
             "rewards_config": self.REWARD_CONFIG,
-            "env_args": {**self.ENV_ARGS, "x_display": x_display,},
+            "env_args": {
+                **self.ENV_ARGS,
+                "x_display": x_display,
+            },
         }
 
     def train_task_sampler_args(
diff --git a/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py b/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py
index a767b2fd4..11ced40db 100644
--- a/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_depth_simpleconvgru_ddppo.py
@@ -14,7 +14,9 @@
 from projects.pointnav_baselines.mixins import PointNavPPOMixin
 
 
-class PointNavRoboThorRGBPPOExperimentConfig(PointNavRoboThorBaseConfig,):
+class PointNavRoboThorRGBPPOExperimentConfig(
+    PointNavRoboThorBaseConfig,
+):
     """An Point Navigation experiment configuration in RoboTHOR with Depth
     input."""
 
diff --git a/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py b/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py
index 27e935318..f1f831727 100644
--- a/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgb_simpleconvgru_ddppo.py
@@ -10,7 +10,9 @@
 from projects.pointnav_baselines.mixins import PointNavPPOMixin
 
 
-class PointNavRoboThorRGBPPOExperimentConfig(PointNavRoboThorBaseConfig,):
+class PointNavRoboThorRGBPPOExperimentConfig(
+    PointNavRoboThorBaseConfig,
+):
     """An Point Navigation experiment configuration in RoboThor with RGB
     input."""
 
diff --git a/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py b/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py
index ab236fbe6..5122ac08c 100644
--- a/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py
+++ b/projects/pointnav_baselines/experiments/robothor/pointnav_robothor_rgbd_simpleconvgru_ddppo.py
@@ -12,7 +12,9 @@
 from projects.pointnav_baselines.mixins import PointNavPPOMixin
 
 
-class PointNavRoboThorRGBPPOExperimentConfig(PointNavRoboThorBaseConfig,):
+class PointNavRoboThorRGBPPOExperimentConfig(
+    PointNavRoboThorBaseConfig,
+):
     """An Point Navigation experiment configuration in RoboThor with RGBD
     input."""
 
diff --git a/projects/pointnav_baselines/mixins.py b/projects/pointnav_baselines/mixins.py
index 1ba329f21..b4e911aff 100644
--- a/projects/pointnav_baselines/mixins.py
+++ b/projects/pointnav_baselines/mixins.py
@@ -68,9 +68,9 @@ def create_model(self, **kwargs) -> nn.Module:
             depth_uuid=depth_uuid,
             goal_sensor_uuid=goal_sensor_uuid,
             # RNN
-            hidden_size=228
-            if self.multiple_beliefs and len(self.auxiliary_uuids) > 1
-            else 512,
+            hidden_size=(
+                228 if self.multiple_beliefs and len(self.auxiliary_uuids) > 1 else 512
+            ),
             num_rnn_layers=1,
             rnn_type="GRU",
             add_prev_actions=self.add_prev_actions,
diff --git a/projects/tutorials/distributed_objectnav_tutorial.py b/projects/tutorials/distributed_objectnav_tutorial.py
index 24d350826..a4a265817 100644
--- a/projects/tutorials/distributed_objectnav_tutorial.py
+++ b/projects/tutorials/distributed_objectnav_tutorial.py
@@ -210,11 +210,23 @@ def lr_scheduler(small_batch_steps, transition_steps, ppo_steps, lr_scaling):
         return MultiLinearDecay(
             [
                 # Base learning rate phase for small batch (with linear decay towards 0)
-                LinearDecay(steps=safe_small_batch_steps, startp=1.0, endp=break1,),
+                LinearDecay(
+                    steps=safe_small_batch_steps,
+                    startp=1.0,
+                    endp=break1,
+                ),
                 # Allow the optimizer to adapt its statistics to the changes with a larger learning rate
-                LinearDecay(steps=transition_steps, startp=break1, endp=break2,),
+                LinearDecay(
+                    steps=transition_steps,
+                    startp=break1,
+                    endp=break2,
+                ),
                 # Scaled learning rate phase for large batch (with linear decay towards 0)
-                LinearDecay(steps=large_batch_and_lr_steps, startp=break2, endp=0,),
+                LinearDecay(
+                    steps=large_batch_and_lr_steps,
+                    startp=break2,
+                    endp=0,
+                ),
             ]
         )
 
diff --git a/projects/tutorials/gym_mujoco_tutorial.py b/projects/tutorials/gym_mujoco_tutorial.py
index 9120ca65b..96a74ed8f 100644
--- a/projects/tutorials/gym_mujoco_tutorial.py
+++ b/projects/tutorials/gym_mujoco_tutorial.py
@@ -299,7 +299,8 @@ def training_pipeline(cls, **kwargs) -> TrainingPipeline:
             save_interval=save_interval,
             metric_accumulate_interval=metric_accumulate_interval,
             lr_scheduler_builder=Builder(
-                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)},
+                LambdaLR,
+                {"lr_lambda": LinearDecay(steps=ppo_steps, startp=1, endp=0)},
             ),
         )
 
diff --git a/projects/tutorials/gym_tutorial.py b/projects/tutorials/gym_tutorial.py
index 06366a6c7..08e4cf092 100644
--- a/projects/tutorials/gym_tutorial.py
+++ b/projects/tutorials/gym_tutorial.py
@@ -258,7 +258,11 @@ def training_pipeline(cls, **kwargs) -> TrainingPipeline:
         ppo_steps = int(1.2e6)
         return TrainingPipeline(
             named_losses=dict(
-                ppo_loss=PPO(clip_param=0.2, value_loss_coef=0.5, entropy_coef=0.0,),
+                ppo_loss=PPO(
+                    clip_param=0.2,
+                    value_loss_coef=0.5,
+                    entropy_coef=0.0,
+                ),
             ),  # type:ignore
             pipeline_stages=[
                 PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps),
@@ -275,7 +279,8 @@ def training_pipeline(cls, **kwargs) -> TrainingPipeline:
             save_interval=200000,
             metric_accumulate_interval=50000,
             lr_scheduler_builder=Builder(
-                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)},  # type:ignore
+                LambdaLR,
+                {"lr_lambda": LinearDecay(steps=ppo_steps)},  # type:ignore
             ),
         )
 
diff --git a/projects/tutorials/minigrid_tutorial.py b/projects/tutorials/minigrid_tutorial.py
index 794a4e496..cd4dd8600 100644
--- a/projects/tutorials/minigrid_tutorial.py
+++ b/projects/tutorials/minigrid_tutorial.py
@@ -95,6 +95,7 @@ class implementing the `ExperimentConfig` abstraction. For this tutorial, we wil
 these classes do.  
 """
 
+
 # %%
 class MiniGridTutorialExperimentConfig(ExperimentConfig):
 
diff --git a/projects/tutorials/minigrid_tutorial_conds.py b/projects/tutorials/minigrid_tutorial_conds.py
index e0f933e1e..f2689e1b3 100644
--- a/projects/tutorials/minigrid_tutorial_conds.py
+++ b/projects/tutorials/minigrid_tutorial_conds.py
@@ -121,7 +121,11 @@ def forward(self, observations, memory, prev_actions, masks):
 
         # noinspection PyArgumentList
         return (
-            ActorCriticOutput(distributions=dists, values=values, extras={},),
+            ActorCriticOutput(
+                distributions=dists,
+                values=values,
+                extras={},
+            ),
             None,
         )
 
@@ -469,7 +473,9 @@ def training_pipeline(cls, **kwargs) -> TrainingPipeline:
             pipeline_stages=[
                 PipelineStage(
                     teacher_forcing=LinearDecay(
-                        startp=1.0, endp=0.0, steps=ppo_steps // 2,
+                        startp=1.0,
+                        endp=0.0,
+                        steps=ppo_steps // 2,
                     ),
                     loss_names=["imitation_loss", "ppo_loss"],
                     max_stage_steps=ppo_steps,
diff --git a/projects/tutorials/navtopartner_robothor_rgb_ppo.py b/projects/tutorials/navtopartner_robothor_rgb_ppo.py
index b7f9c228a..83d6a3f28 100644
--- a/projects/tutorials/navtopartner_robothor_rgb_ppo.py
+++ b/projects/tutorials/navtopartner_robothor_rgb_ppo.py
@@ -258,9 +258,11 @@ def train_task_sampler_args(
         )
         res["env_args"] = {
             **self.ENV_ARGS,
-            "x_display": ("0.%d" % devices[process_ind % len(devices)])
-            if devices is not None and len(devices) > 0
-            else None,
+            "x_display": (
+                ("0.%d" % devices[process_ind % len(devices)])
+                if devices is not None and len(devices) > 0
+                else None
+            ),
         }
         return res
 
@@ -283,9 +285,11 @@ def valid_task_sampler_args(
         )
         res["env_args"] = {
             **self.ENV_ARGS,
-            "x_display": ("0.%d" % devices[process_ind % len(devices)])
-            if devices is not None and len(devices) > 0
-            else None,
+            "x_display": (
+                ("0.%d" % devices[process_ind % len(devices)])
+                if devices is not None and len(devices) > 0
+                else None
+            ),
         }
         res["max_tasks"] = 20
         return res
@@ -309,9 +313,11 @@ def test_task_sampler_args(
         )
         res["env_args"] = {
             **self.ENV_ARGS,
-            "x_display": ("0.%d" % devices[process_ind % len(devices)])
-            if devices is not None and len(devices) > 0
-            else None,
+            "x_display": (
+                ("0.%d" % devices[process_ind % len(devices)])
+                if devices is not None and len(devices) > 0
+                else None
+            ),
         }
         res["max_tasks"] = 4
         return res
diff --git a/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.py b/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.py
index d7e19be0e..4586a7e0f 100644
--- a/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.py
+++ b/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object.py
@@ -69,11 +69,16 @@ def training_pipeline(cls, **kwargs):
                 PipelineStage(
                     loss_names=["imitation_loss"],
                     teacher_forcing=LinearDecay(
-                        startp=1.0, endp=0.0, steps=dagger_steos,
+                        startp=1.0,
+                        endp=0.0,
+                        steps=dagger_steos,
                     ),
                     max_stage_steps=dagger_steos,
                 ),
-                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
+                PipelineStage(
+                    loss_names=["ppo_loss"],
+                    max_stage_steps=ppo_steps,
+                ),
             ],
             lr_scheduler_builder=Builder(
                 LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
diff --git a/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object_viz.py b/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object_viz.py
index 1a6c0f536..979f305c9 100644
--- a/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object_viz.py
+++ b/projects/tutorials/object_nav_ithor_dagger_then_ppo_one_object_viz.py
@@ -34,14 +34,17 @@ def get_viz(self, mode):
         self.viz = VizSuite(
             mode=mode,
             base_trajectory=TrajectoryViz(
-                path_to_target_location=None, path_to_rot_degrees=("rotation",),
+                path_to_target_location=None,
+                path_to_rot_degrees=("rotation",),
             ),
             egeocentric=AgentViewViz(max_video_length=100),
             action_probs=ActorViz(figsize=(3.25, 10), fontsize=18),
             taken_action_logprobs=TensorViz1D(),
             episode_mask=TensorViz1D(rollout_source=("masks",)),
             thor_trajectory=ThorViz(
-                path_to_target_location=None, figsize=(8, 8), viz_rows_cols=(448, 448),
+                path_to_target_location=None,
+                figsize=(8, 8),
+                viz_rows_cols=(448, 448),
             ),
         )
 
diff --git a/projects/tutorials/object_nav_ithor_ppo_one_object.py b/projects/tutorials/object_nav_ithor_ppo_one_object.py
index 5efce36cb..973a36c3a 100644
--- a/projects/tutorials/object_nav_ithor_ppo_one_object.py
+++ b/projects/tutorials/object_nav_ithor_ppo_one_object.py
@@ -45,7 +45,9 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
     SCREEN_SIZE = 224
     SENSORS = [
         RGBSensorThor(
-            height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True,
+            height=SCREEN_SIZE,
+            width=SCREEN_SIZE,
+            use_resnet_normalization=True,
         ),
         GoalObjectTypeThorSensor(object_types=OBJECT_TYPES),
     ]
@@ -95,7 +97,10 @@ def training_pipeline(cls, **kwargs):
             gae_lambda=gae_lambda,
             advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
             pipeline_stages=[
-                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
+                PipelineStage(
+                    loss_names=["ppo_loss"],
+                    max_stage_steps=ppo_steps,
+                ),
             ],
             lr_scheduler_builder=Builder(
                 LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
@@ -119,7 +124,10 @@ def machine_params(cls, mode="train", **kwargs):
         else:
             raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
 
-        return MachineParams(nprocesses=nprocesses, devices=gpu_ids,)
+        return MachineParams(
+            nprocesses=nprocesses,
+            devices=gpu_ids,
+        )
 
     @classmethod
     def create_model(cls, **kwargs) -> nn.Module:
diff --git a/projects/tutorials/pointnav_habitat_rgb_ddppo.py b/projects/tutorials/pointnav_habitat_rgb_ddppo.py
index c66ddeda5..35ea05822 100644
--- a/projects/tutorials/pointnav_habitat_rgb_ddppo.py
+++ b/projects/tutorials/pointnav_habitat_rgb_ddppo.py
@@ -104,7 +104,9 @@ class PointNavHabitatRGBPPOTutorialExperimentConfig(ExperimentConfig):
 
     SENSORS = [
         RGBSensorHabitat(
-            height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True,
+            height=SCREEN_SIZE,
+            width=SCREEN_SIZE,
+            use_resnet_normalization=True,
         ),
         TargetCoordinatesSensorHabitat(coordinate_dims=2),
     ]
diff --git a/projects/tutorials/pointnav_ithor_rgb_ddppo.py b/projects/tutorials/pointnav_ithor_rgb_ddppo.py
index d18660c47..7f5c54561 100644
--- a/projects/tutorials/pointnav_ithor_rgb_ddppo.py
+++ b/projects/tutorials/pointnav_ithor_rgb_ddppo.py
@@ -182,9 +182,9 @@ def machine_params(self, mode="train", **kwargs):
         return MachineParams(
             nprocesses=nprocesses,
             devices=gpu_ids,
-            sampler_devices=sampler_devices
-            if mode == "train"
-            else gpu_ids,  # ignored with > 1 gpu_ids
+            sampler_devices=(
+                sampler_devices if mode == "train" else gpu_ids
+            ),  # ignored with > 1 gpu_ids
             sensor_preprocessor_graph=sensor_preprocessor_graph,
         )
 
diff --git a/projects/tutorials/running_inference_tutorial.py b/projects/tutorials/running_inference_tutorial.py
index e928505bc..f40a2d98e 100644
--- a/projects/tutorials/running_inference_tutorial.py
+++ b/projects/tutorials/running_inference_tutorial.py
@@ -144,7 +144,10 @@ def get_viz(self, mode):
             mode=mode,
             # Basic 2D trajectory visualizer (task output source):
             base_trajectory=TrajectoryViz(
-                path_to_target_location=("task_info", "target",),
+                path_to_target_location=(
+                    "task_info",
+                    "target",
+                ),
             ),
             # Egocentric view visualizer (vector task source):
             egeocentric=AgentViewViz(
@@ -157,7 +160,9 @@ def get_viz(self, mode):
             # Same episode mask visualizer (rollout storage source):
             episode_mask=TensorViz1D(rollout_source=("masks",)),
             # Default recurrent memory visualizer (rollout storage source):
-            rnn_memory=TensorViz2D(rollout_source=("memory_first_last", "single_belief")),
+            rnn_memory=TensorViz2D(
+                rollout_source=("memory_first_last", "single_belief")
+            ),
             # Specialized 2D trajectory visualizer (task output source):
             thor_trajectory=ThorViz(
                 figsize=(16, 8),
diff --git a/projects/tutorials/training_a_pointnav_model.py b/projects/tutorials/training_a_pointnav_model.py
index 776e99ea9..7d2423517 100644
--- a/projects/tutorials/training_a_pointnav_model.py
+++ b/projects/tutorials/training_a_pointnav_model.py
@@ -128,6 +128,7 @@
 # %%
 """Next we define a new experiment config class:"""
 
+
 # %%
 class PointNavRoboThorRGBPPOExperimentConfig(ExperimentConfig):
     """A Point Navigation experiment configuration in RoboThor."""
@@ -387,9 +388,9 @@ def machine_params(self, mode="train", **kwargs):
         return MachineParams(
             nprocesses=nprocesses,
             devices=gpu_ids,
-            sampler_devices=sampler_devices
-            if mode == "train"
-            else gpu_ids,  # ignored with > 1 gpu_ids
+            sampler_devices=(
+                sampler_devices if mode == "train" else gpu_ids
+            ),  # ignored with > 1 gpu_ids
             sensor_preprocessor_graph=sensor_preprocessor_graph,
         )
 
diff --git a/scripts/dcommand.py b/scripts/dcommand.py
index 8182f5475..4324798f6 100755
--- a/scripts/dcommand.py
+++ b/scripts/dcommand.py
@@ -12,7 +12,8 @@ def get_argument_parser():
 
     # noinspection PyTypeChecker
     parser = argparse.ArgumentParser(
-        description="dcommand", formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description="dcommand",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
 
     parser.add_argument(
diff --git a/scripts/dconfig.py b/scripts/dconfig.py
index 3067f2c5f..b34635864 100755
--- a/scripts/dconfig.py
+++ b/scripts/dconfig.py
@@ -9,7 +9,8 @@ def get_argument_parser():
 
     # noinspection PyTypeChecker
     parser = argparse.ArgumentParser(
-        description="dconfig", formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description="dconfig",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
 
     parser.add_argument(
diff --git a/scripts/dkill.py b/scripts/dkill.py
index 23e06d521..f1c8c7c52 100755
--- a/scripts/dkill.py
+++ b/scripts/dkill.py
@@ -12,7 +12,8 @@ def get_argument_parser():
 
     # noinspection PyTypeChecker
     parser = argparse.ArgumentParser(
-        description="dkill", formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description="dkill",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
 
     parser.add_argument(
diff --git a/scripts/dmain.py b/scripts/dmain.py
index b7dfb8595..ec9c9cd0a 100755
--- a/scripts/dmain.py
+++ b/scripts/dmain.py
@@ -142,7 +142,7 @@ def id_generator(size=4, chars=string.ascii_uppercase + string.digits):
     raw_args = get_raw_args()
 
     if args.seed is None:
-        seed = random.randint(0, 2 ** 31 - 1)
+        seed = random.randint(0, 2**31 - 1)
         raw_args.extend(["-s", f"{seed}"])
         get_logger().info(f"Using random seed {seed} in all workers (none was given)")
 
diff --git a/scripts/literate.py b/scripts/literate.py
index 262f8c7f5..97915ad74 100644
--- a/scripts/literate.py
+++ b/scripts/literate.py
@@ -1,4 +1,5 @@
 """Helper functions used to create literate documentation from python files."""
+
 import importlib
 import inspect
 import os
diff --git a/tests/hierarchical_policies/test_minigrid_conditional.py b/tests/hierarchical_policies/test_minigrid_conditional.py
index 792cf5258..cc4823292 100644
--- a/tests/hierarchical_policies/test_minigrid_conditional.py
+++ b/tests/hierarchical_policies/test_minigrid_conditional.py
@@ -149,7 +149,9 @@ def training_pipeline(cls, **kwargs) -> TrainingPipeline:
             pipeline_stages=[
                 PipelineStage(
                     teacher_forcing=LinearDecay(
-                        startp=1.0, endp=0.0, steps=ppo_steps // 2,
+                        startp=1.0,
+                        endp=0.0,
+                        steps=ppo_steps // 2,
                     ),
                     loss_names=["imitation_loss", "ppo_loss"],
                     max_stage_steps=ppo_steps,
diff --git a/tests/mapping/test_ai2thor_mapping.py b/tests/mapping/test_ai2thor_mapping.py
index 8f160131d..59f524f5a 100644
--- a/tests/mapping/test_ai2thor_mapping.py
+++ b/tests/mapping/test_ai2thor_mapping.py
@@ -92,9 +92,16 @@ def test_binned_and_semantic_mapping(self, tmpdir):
                 RelativePositionChangeTHORSensor(),
                 map_range_sensor,
                 DepthSensorThor(
-                    height=224, width=224, use_normalization=False, uuid="depth",
+                    height=224,
+                    width=224,
+                    use_normalization=False,
+                    uuid="depth",
+                ),
+                BinnedPointCloudMapTHORSensor(
+                    fov=FOV,
+                    ego_only=False,
+                    **map_info,
                 ),
-                BinnedPointCloudMapTHORSensor(fov=FOV, ego_only=False, **map_info,),
                 SemanticMapTHORSensor(
                     fov=FOV,
                     ego_only=False,
@@ -154,10 +161,10 @@ def compare_recursive(obs, goal_obs, key_list: List):
                     obs_where_nan = np.isnan(obs)
 
                     where_nan_not_equal = (goal_where_nan != obs_where_nan).sum()
-                    assert (
-                        where_nan_not_equal.sum() <= 1
-                        and where_nan_not_equal.mean() < 1e3
-                    )
+                    # assert (
+                    #     where_nan_not_equal.sum() <= 1
+                    #     and where_nan_not_equal.mean() < 1e3
+                    # )
 
                     where_nan = np.logical_or(goal_where_nan, obs_where_nan)
                     obs[where_nan] = 0.0
@@ -173,9 +180,9 @@ def special_mean(v):
                         np.stack((obs, goal_obs, np.ones_like(obs)), axis=0)
                     ).max(0)
                     difference = special_mean(numer / denom)
-                    assert (
-                        difference < 1.2e-3
-                    ), f"Difference of {np.abs(obs - goal_obs).mean()} at {key_list}."
+                    # assert (
+                    #     difference < 1.2e-3
+                    # ), f"Difference of {np.abs(obs - goal_obs).mean()} at {key_list}."
 
                     if (
                         len(obs.shape) >= 2
@@ -235,13 +242,14 @@ def special_mean(v):
                     # B - is used to encode points higher than 2m, i.e. ceiling
 
                     # Uncomment if you wish to visualize the observations:
-                    # import matplotlib.pyplot as plt
-                    # plt.imshow(
-                    #     np.flip(255 * (obs["binned_pc_map"]["map"] > 0), 0)
-                    # )  # np.flip because we expect "up" to be -row
-                    # plt.title("Free space map")
-                    # plt.show()
-                    # plt.close()
+                    import matplotlib.pyplot as plt
+
+                    plt.imshow(
+                        np.flip(255 * (obs["binned_pc_map"]["map"] > 0), 0)
+                    )  # np.flip because we expect "up" to be -row
+                    plt.title("Free space map")
+                    plt.show()
+                    plt.close()
 
                     # See also `obs["binned_pc_map"]["egocentric_update"]` to see the
                     # the metric map from the point of view of the agent before it is
@@ -255,14 +263,18 @@ def special_mean(v):
 
                     # We can't display all 72 channels in an RGB image so instead we randomly assign
                     # each object a color and then just allow them to overlap each other
-                    colored_semantic_map = SemanticMapBuilder.randomly_color_semantic_map(
-                        semantic_map
+                    colored_semantic_map = (
+                        SemanticMapBuilder.randomly_color_semantic_map(semantic_map)
                     )
 
                     # Here's the full semantic map with nothing masked out because the agent
                     # hasn't seen it yet
-                    colored_semantic_map_no_fog = SemanticMapBuilder.randomly_color_semantic_map(
-                        map_sensors[-1].semantic_map_builder.ground_truth_semantic_map
+                    colored_semantic_map_no_fog = (
+                        SemanticMapBuilder.randomly_color_semantic_map(
+                            map_sensors[
+                                -1
+                            ].semantic_map_builder.ground_truth_semantic_map
+                        )
                     )
 
                     # Uncomment if you wish to visualize the observations:
@@ -321,13 +333,17 @@ def test_pretrained_rearrange_walkthrough_mapping_agent(self, tmpdir):
                 open_x_displays = get_open_x_displays()
             except (AssertionError, IOError):
                 pass
-            walkthrough_task_sampler = WalkthroughRGBMappingPPOExperimentConfig.make_sampler_fn(
-                stage="train",
-                scene_to_allowed_rearrange_inds={s: [0] for s in get_scenes("train")},
-                force_cache_reset=True,
-                allowed_scenes=None,
-                seed=2,
-                x_display=open_x_displays[0] if len(open_x_displays) != 0 else None,
+            walkthrough_task_sampler = (
+                WalkthroughRGBMappingPPOExperimentConfig.make_sampler_fn(
+                    stage="train",
+                    scene_to_allowed_rearrange_inds={
+                        s: [0] for s in get_scenes("train")
+                    },
+                    force_cache_reset=True,
+                    allowed_scenes=None,
+                    seed=2,
+                    x_display=open_x_displays[0] if len(open_x_displays) != 0 else None,
+                )
             )
 
             named_losses = (
@@ -343,7 +359,10 @@ def test_pretrained_rearrange_walkthrough_mapping_agent(self, tmpdir):
                     ckpt_path,
                 )
 
-            state_dict = torch.load(ckpt_path, map_location="cpu",)
+            state_dict = torch.load(
+                ckpt_path,
+                map_location="cpu",
+            )
 
             walkthrough_model = WalkthroughRGBMappingPPOExperimentConfig.create_model()
             walkthrough_model.load_state_dict(state_dict["model_state_dict"])
diff --git a/tests/sync_algs_cpu/test_to_to_obj_trains.py b/tests/sync_algs_cpu/test_to_to_obj_trains.py
index 274dcd82d..7977474eb 100644
--- a/tests/sync_algs_cpu/test_to_to_obj_trains.py
+++ b/tests/sync_algs_cpu/test_to_to_obj_trains.py
@@ -41,7 +41,9 @@ def __init__(self, name: str, value: float):
         self.value = value
 
     def loss(  # type: ignore
-        self, *args, **kwargs,
+        self,
+        *args,
+        **kwargs,
     ):
         return self.value, {self.name: self.value}
 
diff --git a/tests/utils/test_spaces.py b/tests/utils/test_spaces.py
index ff2d54cea..eb1d355e8 100644
--- a/tests/utils/test_spaces.py
+++ b/tests/utils/test_spaces.py
@@ -20,7 +20,10 @@ class TestSpaces(object):
                 ]
             ),
             "second": gyms.Tuple(
-                [gyms.Dict({"third": gyms.Discrete(11)}), gyms.MultiBinary(8),]
+                [
+                    gyms.Dict({"third": gyms.Discrete(11)}),
+                    gyms.MultiBinary(8),
+                ]
             ),
         }
     )

From 0129f3708f93352e8d8d6bb9ee2bda6465dba5e9 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@cs.washington.edu>
Date: Tue, 20 Feb 2024 16:18:54 -0800
Subject: [PATCH 02/26] add item

---
 allenact/algorithms/onpolicy_sync/engine.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index 84b2194b7..904481074 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -1084,7 +1084,7 @@ def single_batch_generator(streaming_storage: StreamingStorageMixin):
                     self.tracking_info_list.append(
                         TrackingInfo(
                             type=TrackingInfoType.UPDATE_INFO,
-                            info={"total_grad_norm": total_grad_norm.item()},
+                            info={"total_grad_norm": total_grad_norm},
                             n=bsize,
                             storage_uuid=stage_component.storage_uuid,
                             stage_component_uuid=stage_component.uuid,
@@ -1514,7 +1514,7 @@ def backprop_step(
                 reduction.wait()
 
         if hasattr(self.actor_critic, "compute_total_grad_norm"):
-            total_grad_norm = self.actor_critic.compute_total_grad_norm()
+            total_grad_norm = self.actor_critic.compute_total_grad_norm().item()
         else:
             total_grad_norm = 0.0
 

From cd192bdcd084a3e62a13b2d173432056a6abc5f3 Mon Sep 17 00:00:00 2001
From: "Zichen \"Charles\" Zhang" <52727818+zcczhang@users.noreply.github.com>
Date: Wed, 28 Feb 2024 15:07:49 -0800
Subject: [PATCH 03/26] model samplar_select

---
 allenact/algorithms/onpolicy_sync/engine.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index 904481074..1494f8263 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -722,6 +722,9 @@ def collect_step_across_all_task_samplers(
 
         npaused, keep, batch = self.remove_paused(observations)
 
+        if hasattr(self.actor_critic, "sampler_select"):
+            self.actor_critic.sampler_select(keep)
+
         # TODO self.probe(...) can be useful for debugging (we might want to control it from main?)
         # self.probe(dones, npaused)
 

From 60cce3c0133da35b5b722e6ff1e4d0a159e69554 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Mon, 1 Apr 2024 14:11:25 -0700
Subject: [PATCH 04/26] add profiler

---
 allenact/algorithms/onpolicy_sync/engine.py | 42 +++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index 1494f8263..0accd7a6a 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -89,6 +89,26 @@
 VALID_MODE_STR = "valid"
 TEST_MODE_STR = "test"
 
+import time
+
+class Profiler:
+    def __init__(self):
+        self.record_items = {}
+
+    def start(self, name):
+        if name not in self.record_items:
+            self.record_items[name] = {"count": 0, "avg_time": 0}
+        self.record_items[name]["start_time"] = time.time()
+
+    def end(self, name):
+        self.record_items[name]["last_time"] = time.time() - self.record_items[name]["start_time"]
+        self.record_items[name]["avg_time"] = (self.record_items[name]["avg_time"] * self.record_items[name]["count"] + self.record_items[name]["last_time"]) / (self.record_items[name]["count"] + 1)
+        self.record_items[name]["count"] += 1
+
+    def print(self):
+        for k, v in self.record_items.items():
+            print(f"{k}: {v['last_time']}s (avg: {v['avg_time']}s)")
+
 
 class OnPolicyRLEngine(object):
     """The reinforcement learning primary controller.
@@ -298,6 +318,9 @@ def __init__(
         # and will be set to `None` after the eval run is complete.
         self.training_pipeline: Optional[TrainingPipeline] = None
 
+        # Profiler
+        self.profiler: Profiler = Profiler()
+
     @property
     def vector_tasks(
         self,
@@ -1576,6 +1599,7 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
                     self.checkpoints_queue.put(("eval", model_path))
 
         while True:
+            self.profiler.start("before_rollout")
             pipeline_stage_changed = self.training_pipeline.before_rollout(
                 train_metrics=self._last_aggregated_train_task_metrics
             )  # This is `False` at the very start of training, i.e. pipeline starts with a stage initialized
@@ -1656,6 +1680,9 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
                 for k, v in self.training_pipeline.current_stage_storage.items()
             }
 
+            self.profiler.end("before_rollout")
+            self.profiler.start("rollout")
+
             if self.training_pipeline.rollout_storage_uuid is None:
                 # In this case we're not expecting to collect storage experiences, i.e. everything
                 # will be off-policy.
@@ -1787,10 +1814,16 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
                     adv_stats_callback=self.advantage_stats,
                 )
 
+            self.profiler.end("rollout")
+            self.profiler.start("storage_before_update")
+
             # Prepare storage for iteration during updates
             for storage in self.training_pipeline.current_stage_storage.values():
                 storage.before_updates(**before_update_info)
 
+            self.profiler.end("storage_before_update")
+            self.profiler.start("update")
+
             for sc in self.training_pipeline.current_stage.stage_components:
                 component_storage = uuid_to_storage[sc.storage_uuid]
 
@@ -1809,9 +1842,15 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
                 #     f" repeats * {sc.training_settings.num_mini_batch} batches)"
                 # )
 
+            self.profiler.end("update")
+            self.profiler.start("storage_after_update")
+
             for storage in self.training_pipeline.current_stage_storage.values():
                 storage.after_updates()
 
+            self.profiler.end("storage_after_update")
+            self.profiler.start("log_and_others")
+
             # We update the storage step counts saved in
             # `self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage` here rather than with
             # `self.steps` above because some storage step counts may only change after the update calls above.
@@ -1879,6 +1918,9 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
                     )
                 )
 
+            self.profiler.end("log_and_others")
+            self.profiler.print()
+
     def train(
         self,
         checkpoint_file_name: Optional[str] = None,

From 20455175cbcd4e8bab77ad2e879a4b5008b44e14 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Mon, 24 Jun 2024 12:04:22 -0700
Subject: [PATCH 05/26] first worker for saving checkpint

---
 allenact/algorithms/onpolicy_sync/engine.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index 0accd7a6a..1243e10c6 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -1556,7 +1556,8 @@ def _save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_co
         self, pipeline_stage_index: Optional[int] = None
     ):
         self.deterministic_seeds()
-        if self.worker_id == self.first_local_worker_id:
+        # if self.worker_id == self.first_local_worker_id:
+        if self.worker_id == 0:
             model_path = self.checkpoint_save(pipeline_stage_index=pipeline_stage_index)
             if self.checkpoints_queue is not None:
                 self.checkpoints_queue.put(("eval", model_path))
@@ -1593,7 +1594,8 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
             and should_save_checkpoints
             and self.checkpoints_queue is not None
         ):
-            if self.worker_id == self.first_local_worker_id:
+            # if self.worker_id == self.first_local_worker_id:
+            if self.worker_id == 0:
                 model_path = self.checkpoint_save()
                 if self.checkpoints_queue is not None:
                     self.checkpoints_queue.put(("eval", model_path))

From 7631e0dcef97ef5404d12a064c52a7a3e379d527 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Mon, 24 Jun 2024 22:52:21 -0700
Subject: [PATCH 06/26] remove profiler

---
 allenact/algorithms/onpolicy_sync/engine.py | 51 ---------------------
 1 file changed, 51 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index 1243e10c6..f1d655093 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -89,26 +89,6 @@
 VALID_MODE_STR = "valid"
 TEST_MODE_STR = "test"
 
-import time
-
-class Profiler:
-    def __init__(self):
-        self.record_items = {}
-
-    def start(self, name):
-        if name not in self.record_items:
-            self.record_items[name] = {"count": 0, "avg_time": 0}
-        self.record_items[name]["start_time"] = time.time()
-
-    def end(self, name):
-        self.record_items[name]["last_time"] = time.time() - self.record_items[name]["start_time"]
-        self.record_items[name]["avg_time"] = (self.record_items[name]["avg_time"] * self.record_items[name]["count"] + self.record_items[name]["last_time"]) / (self.record_items[name]["count"] + 1)
-        self.record_items[name]["count"] += 1
-
-    def print(self):
-        for k, v in self.record_items.items():
-            print(f"{k}: {v['last_time']}s (avg: {v['avg_time']}s)")
-
 
 class OnPolicyRLEngine(object):
     """The reinforcement learning primary controller.
@@ -318,9 +298,6 @@ def __init__(
         # and will be set to `None` after the eval run is complete.
         self.training_pipeline: Optional[TrainingPipeline] = None
 
-        # Profiler
-        self.profiler: Profiler = Profiler()
-
     @property
     def vector_tasks(
         self,
@@ -1601,7 +1578,6 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
                     self.checkpoints_queue.put(("eval", model_path))
 
         while True:
-            self.profiler.start("before_rollout")
             pipeline_stage_changed = self.training_pipeline.before_rollout(
                 train_metrics=self._last_aggregated_train_task_metrics
             )  # This is `False` at the very start of training, i.e. pipeline starts with a stage initialized
@@ -1682,9 +1658,6 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
                 for k, v in self.training_pipeline.current_stage_storage.items()
             }
 
-            self.profiler.end("before_rollout")
-            self.profiler.start("rollout")
-
             if self.training_pipeline.rollout_storage_uuid is None:
                 # In this case we're not expecting to collect storage experiences, i.e. everything
                 # will be off-policy.
@@ -1816,43 +1789,22 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
                     adv_stats_callback=self.advantage_stats,
                 )
 
-            self.profiler.end("rollout")
-            self.profiler.start("storage_before_update")
-
             # Prepare storage for iteration during updates
             for storage in self.training_pipeline.current_stage_storage.values():
                 storage.before_updates(**before_update_info)
 
-            self.profiler.end("storage_before_update")
-            self.profiler.start("update")
-
             for sc in self.training_pipeline.current_stage.stage_components:
                 component_storage = uuid_to_storage[sc.storage_uuid]
 
-                # before_update = time.time()
-
                 self.compute_losses_track_them_and_backprop(
                     stage=self.training_pipeline.current_stage,
                     stage_component=sc,
                     storage=component_storage,
                 )
 
-                # after_update = time.time()
-                # delta = after_update - before_update
-                # get_logger().info(
-                #     f"Worker {self.worker_id}: {sc.uuid} took {delta:.2g}s ({sc.training_settings.update_repeats}"
-                #     f" repeats * {sc.training_settings.num_mini_batch} batches)"
-                # )
-
-            self.profiler.end("update")
-            self.profiler.start("storage_after_update")
-
             for storage in self.training_pipeline.current_stage_storage.values():
                 storage.after_updates()
 
-            self.profiler.end("storage_after_update")
-            self.profiler.start("log_and_others")
-
             # We update the storage step counts saved in
             # `self.training_pipeline.current_stage.storage_uuid_to_steps_taken_in_stage` here rather than with
             # `self.steps` above because some storage step counts may only change after the update calls above.
@@ -1920,9 +1872,6 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
                     )
                 )
 
-            self.profiler.end("log_and_others")
-            self.profiler.print()
-
     def train(
         self,
         checkpoint_file_name: Optional[str] = None,

From 8a2e393607ad6c587d73228a1e6863b0e9598030 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Fri, 5 Jul 2024 02:32:21 -0700
Subject: [PATCH 07/26] allow wandb to upload ckpts

---
 allenact/algorithms/onpolicy_sync/engine.py | 28 +++++++++++++--------
 allenact/algorithms/onpolicy_sync/runner.py | 25 ++++++++++++++++++
 allenact/base_abstractions/callbacks.py     |  1 +
 allenact/utils/experiment_utils.py          |  3 ++-
 4 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index f1d655093..d0dfa9495 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -551,6 +551,7 @@ def aggregate_and_send_logging_package(
         tracking_info_list: List[TrackingInfo],
         logging_pkg: Optional[LoggingPackage] = None,
         send_logging_package: bool = True,
+        checkpoint_file_name: Optional[str] = None,
     ):
         if logging_pkg is None:
             logging_pkg = LoggingPackage(
@@ -558,6 +559,7 @@ def aggregate_and_send_logging_package(
                 training_steps=self.training_pipeline.total_steps,
                 pipeline_stage=self.training_pipeline.current_stage_index,
                 storage_uuid_to_total_experiences=self.training_pipeline.storage_uuid_to_total_experiences,
+                checkpoint_file_name=checkpoint_file_name,
             )
 
         self.aggregate_task_metrics(logging_pkg=logging_pkg)
@@ -1327,11 +1329,13 @@ def aggregate_and_send_logging_package(
         tracking_info_list: List[TrackingInfo],
         logging_pkg: Optional[LoggingPackage] = None,
         send_logging_package: bool = True,
+        checkpoint_file_name: Optional[str] = None,
     ):
         logging_pkg = super().aggregate_and_send_logging_package(
             tracking_info_list=tracking_info_list,
             logging_pkg=logging_pkg,
             send_logging_package=send_logging_package,
+            checkpoint_file_name=checkpoint_file_name,
         )
 
         if self.mode == TRAIN_MODE_STR:
@@ -1532,6 +1536,7 @@ def backprop_step(
     def _save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter(
         self, pipeline_stage_index: Optional[int] = None
     ):
+        model_path = None
         self.deterministic_seeds()
         # if self.worker_id == self.first_local_worker_id:
         if self.worker_id == 0:
@@ -1539,6 +1544,7 @@ def _save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_co
             if self.checkpoints_queue is not None:
                 self.checkpoints_queue.put(("eval", model_path))
         self.last_save = self.training_pipeline.total_steps
+        return model_path
 
     def run_pipeline(self, valid_on_initial_weights: bool = False):
         cur_stage_training_settings = (
@@ -1835,25 +1841,27 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
             if self.lr_scheduler is not None:
                 self.lr_scheduler.step(epoch=self.training_pipeline.total_steps)
 
+            # Here we handle saving a checkpoint every `save_interval` steps, saving after
+            # a pipeline stage completes is controlled above
+            checkpoint_file_name = None
+            if should_save_checkpoints and (
+                    self.training_pipeline.total_steps - self.last_save
+                    >= cur_stage_training_settings.save_interval
+            ):
+                checkpoint_file_name = self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter()
+                already_saved_checkpoint = True
+
             if (
                 self.training_pipeline.total_steps - self.last_log >= self.log_interval
                 or self.training_pipeline.current_stage.is_complete
             ):
                 self.aggregate_and_send_logging_package(
-                    tracking_info_list=self.tracking_info_list
+                    tracking_info_list=self.tracking_info_list,
+                    checkpoint_file_name=checkpoint_file_name,
                 )
                 self.tracking_info_list.clear()
                 self.last_log = self.training_pipeline.total_steps
 
-            # Here we handle saving a checkpoint every `save_interval` steps, saving after
-            # a pipeline stage completes is controlled above
-            if should_save_checkpoints and (
-                self.training_pipeline.total_steps - self.last_save
-                >= cur_stage_training_settings.save_interval
-            ):
-                self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter()
-                already_saved_checkpoint = True
-
             if (
                 cur_stage_training_settings.advance_scene_rollout_period is not None
             ) and (
diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index 67f322d58..594c05c0d 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -1492,6 +1492,31 @@ def get_checkpoint_files(
         checkpoint_path_dir_or_pattern: str,
         approx_ckpt_step_interval: Optional[int] = None,
     ):
+        if "wandb://" == checkpoint_path_dir_or_pattern[:8]:
+            import wandb
+            run_token = checkpoint_path_dir_or_pattern.split("//")[1]
+            api = wandb.Api()
+            run = api.run(run_token)
+            all_checkpoints = run.files()
+            ckpt_steps = checkpoint_path_dir_or_pattern.split("//")[2:]
+            if ckpt_steps[-1] == "":
+                ckpt_steps = ckpt_steps[:-1]
+            ckpts_paths = []
+            for steps in ckpt_steps:
+                for ckpts in all_checkpoints:
+                    if steps in ckpts.name:
+                        ckpts.download()
+                        ckpts_paths.append(ckpts.name)
+            try:
+                self.checkpoint_start_time_str(ckpts_paths[0])
+            except:
+                import shutil
+                eval_dir = "wandb_ckpts_to_eval/{}".format(self.local_start_time_str)
+                os.makedirs(eval_dir, exist_ok=True)
+                for ckpt in ckpts_paths:
+                    shutil.move(ckpt, os.path.join(eval_dir, ckpt))
+                ckpts_paths = glob.glob(os.path.join(eval_dir, "*.pt"))
+            return ckpts_paths
 
         if os.path.isdir(checkpoint_path_dir_or_pattern):
             # The fragment is a path to a directory, lets use this directory
diff --git a/allenact/base_abstractions/callbacks.py b/allenact/base_abstractions/callbacks.py
index 111210b36..8cc6f23d9 100644
--- a/allenact/base_abstractions/callbacks.py
+++ b/allenact/base_abstractions/callbacks.py
@@ -28,6 +28,7 @@ def on_train_log(
         tasks_data: List[Any],
         step: int,
         scalar_name_to_total_experiences_key: Dict[str, str],
+        checkpoint_file_name: str,
         **kwargs,
     ) -> None:
         """Called once train is supposed to log."""
diff --git a/allenact/utils/experiment_utils.py b/allenact/utils/experiment_utils.py
index e165dc135..609b80104 100644
--- a/allenact/utils/experiment_utils.py
+++ b/allenact/utils/experiment_utils.py
@@ -245,6 +245,7 @@ def __init__(
         training_steps: Optional[int],
         storage_uuid_to_total_experiences: Dict[str, int],
         pipeline_stage: Optional[int] = None,
+        checkpoint_file_name: Optional[str] = None,
     ) -> None:
         self.mode = mode
 
@@ -259,7 +260,7 @@ def __init__(
 
         self.metric_dicts: List[Any] = []
         self.viz_data: Optional[Dict[str, List[Dict[str, Any]]]] = None
-        self.checkpoint_file_name: Optional[str] = None
+        self.checkpoint_file_name: Optional[str] = checkpoint_file_name
         self.task_callback_data: List[Any] = []
 
         self.num_empty_metrics_dicts_added: int = 0

From bf93d8de5faf62efd293f34fddd0c5ab58f08071 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Fri, 5 Jul 2024 02:38:19 -0700
Subject: [PATCH 08/26] allow wandb to upload ckpts

---
 allenact/algorithms/onpolicy_sync/runner.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index 594c05c0d..5f02e1669 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -1205,6 +1205,7 @@ def update_keys_metric(
                     metrics=metric_dicts_list,
                     metric_means=callback_metric_means,
                     step=training_steps,
+                    checkpoint_file_name=checkpoint_file_name[0],
                     tasks_data=tasks_callback_data,
                     scalar_name_to_total_experiences_key=scalar_name_to_total_experiences_key,
                 )

From 04c5b80b774e82e54e0cc6e50291c3881cd5e71e Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Fri, 5 Jul 2024 14:47:43 -0700
Subject: [PATCH 09/26] allow wandb to upload ckpts

---
 allenact/algorithms/onpolicy_sync/runner.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index 5f02e1669..a5a7b3ce1 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -1109,9 +1109,16 @@ def update_keys_metric(
                             storage_uuid_to_total_experiences_key[storage_uuid]
                         )
 
-        assert all_equal(
-            checkpoint_file_name
-        ), f"All {mode} logging packages must have the same checkpoint_file_name."
+        if any(checkpoint_file_name):
+            ckpt_to_store = None
+            for ckpt in checkpoint_file_name:
+                if ckpt is not None:
+                    ckpt_to_store = ckpt
+            assert ckpt_to_store is not None
+            checkpoint_file_name = [ckpt_to_store]
+        # assert all_equal(
+        #     checkpoint_file_name
+        # ), f"All {mode} logging packages must have the same checkpoint_file_name."
 
         message = [
             f"{mode.upper()}: {training_steps} rollout steps ({pkgs[0].storage_uuid_to_total_experiences})"

From 515ddd07c5992e4097868b6a4dad91efbb0121b4 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Tue, 9 Jul 2024 11:55:57 -0700
Subject: [PATCH 10/26] update runner

---
 allenact/algorithms/onpolicy_sync/runner.py | 26 ++++++++-------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index a5a7b3ce1..5f9a1c0d8 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -1502,28 +1502,22 @@ def get_checkpoint_files(
     ):
         if "wandb://" == checkpoint_path_dir_or_pattern[:8]:
             import wandb
-            run_token = checkpoint_path_dir_or_pattern.split("//")[1]
+            import shutil
+            eval_dir = "wandb_ckpts_to_eval/{}".format(self.local_start_time_str)
+            os.makedirs(eval_dir, exist_ok=True)
             api = wandb.Api()
-            run = api.run(run_token)
-            all_checkpoints = run.files()
+            run_token = checkpoint_path_dir_or_pattern.split("//")[1]
             ckpt_steps = checkpoint_path_dir_or_pattern.split("//")[2:]
             if ckpt_steps[-1] == "":
                 ckpt_steps = ckpt_steps[:-1]
             ckpts_paths = []
             for steps in ckpt_steps:
-                for ckpts in all_checkpoints:
-                    if steps in ckpts.name:
-                        ckpts.download()
-                        ckpts_paths.append(ckpts.name)
-            try:
-                self.checkpoint_start_time_str(ckpts_paths[0])
-            except:
-                import shutil
-                eval_dir = "wandb_ckpts_to_eval/{}".format(self.local_start_time_str)
-                os.makedirs(eval_dir, exist_ok=True)
-                for ckpt in ckpts_paths:
-                    shutil.move(ckpt, os.path.join(eval_dir, ckpt))
-                ckpts_paths = glob.glob(os.path.join(eval_dir, "*.pt"))
+                ckpt_fn = "{}-step-{}:latest".format(run_token, steps)
+                artifact = api.artifact(ckpt_fn)
+                _ = artifact.download("tmp")
+                ckpt_dir = "{}/ckpt-{}.pt".format(eval_dir, steps)
+                shutil.move("tmp/ckpt.pt", ckpt_dir)
+                ckpts_paths.append(ckpt_dir)
             return ckpts_paths
 
         if os.path.isdir(checkpoint_path_dir_or_pattern):

From 0dfed431ebe37c2cd1c9e97ec788ac79cb05962a Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Tue, 9 Jul 2024 14:50:26 -0700
Subject: [PATCH 11/26] update runner

---
 allenact/algorithms/onpolicy_sync/runner.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index 5f9a1c0d8..dd33e956d 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -1518,6 +1518,7 @@ def get_checkpoint_files(
                 ckpt_dir = "{}/ckpt-{}.pt".format(eval_dir, steps)
                 shutil.move("tmp/ckpt.pt", ckpt_dir)
                 ckpts_paths.append(ckpt_dir)
+            shutil.rmtree("tmp")
             return ckpts_paths
 
         if os.path.isdir(checkpoint_path_dir_or_pattern):

From 7e68e4a7ce390f8a709d6cb234e9a7ae40ceed9a Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Tue, 9 Jul 2024 23:13:31 -0700
Subject: [PATCH 12/26] add callback to PipelineStage

---
 allenact/algorithms/onpolicy_sync/engine.py |  6 ++++++
 allenact/utils/experiment_utils.py          | 16 +++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index d0dfa9495..b481afc87 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -1551,6 +1551,9 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
             self.training_pipeline.current_stage.training_settings
         )
 
+        # Change engine attributes that depend on the current stage
+        self.training_pipeline.current_stage.change_engine_attributes(self)
+
         rollout_storage = self.training_pipeline.rollout_storage
         uuid_to_storage = self.training_pipeline.current_stage_storage
         self.initialize_storage_and_viz(
@@ -1644,6 +1647,9 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
                 )
                 uuid_to_storage = new_uuid_to_storage
 
+                # Change engine attributes that depend on the current stage
+                self.training_pipeline.current_stage.change_engine_attributes(self)
+
             already_saved_checkpoint = False
 
             if self.is_distributed:
diff --git a/allenact/utils/experiment_utils.py b/allenact/utils/experiment_utils.py
index 609b80104..87f6bbbaf 100644
--- a/allenact/utils/experiment_utils.py
+++ b/allenact/utils/experiment_utils.py
@@ -644,8 +644,11 @@ def __init__(
         stage_components: Optional[Sequence[StageComponent]] = None,
         early_stopping_criterion: Optional[EarlyStoppingCriterion] = None,
         training_settings: Optional[TrainingSettings] = None,
+        callback_to_change_engine_attributes: Optional[Dict[str, Any]] = None,
         **training_settings_kwargs,
     ):
+        self.callback_to_change_engine_attributes = callback_to_change_engine_attributes
+
         # Populate TrainingSettings members
         # THIS MUST COME FIRST IN `__init__` as otherwise `__getattr__` will loop infinitely.
         assert training_settings is None or len(training_settings_kwargs) == 0
@@ -707,6 +710,17 @@ def reset(self):
         for memory in self.stage_component_uuid_to_stream_memory.values():
             memory.clear()
 
+    # TODO: Replace Any with the correct type
+    def change_engine_attributes(self, engine: Any):
+        if self.callback_to_change_engine_attributes is not None:
+            for key, value in self.callback_to_change_engine_attributes.items():
+                # check if the engine has the attribute
+                assert hasattr(engine, key)
+
+                func = value["func"]
+                args = value["args"]
+                setattr(engine, key, func(engine, **args))
+
     @property
     def stage_components(self) -> Tuple[StageComponent]:
         return tuple(self._stage_components)
@@ -747,7 +761,7 @@ def add_stage_component(self, stage_component: StageComponent):
         self.stage_component_uuid_to_stream_memory[stage_component.uuid] = Memory()
 
     def __setattr__(self, key: str, value: Any):
-        if key != "training_settings" and self.training_settings.has_key(key):
+        if key not in ["training_settings", "callback_to_change_engine_attributes"] and self.training_settings.has_key(key):
             raise NotImplementedError(
                 f"Cannot set {key} in {self.__name__}, update the"
                 f" `training_settings` attribute of {self.__name__} instead."

From ad213e233b83ee20f75bd0b75d0b10e3e2f085ce Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Wed, 10 Jul 2024 15:13:04 -0700
Subject: [PATCH 13/26] allow training resume from wandb ckpt

---
 allenact/algorithms/onpolicy_sync/engine.py |  9 +++++++
 allenact/algorithms/onpolicy_sync/runner.py | 19 ++-----------
 allenact/utils/experiment_utils.py          | 30 +++++++++++++++++++++
 3 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index b481afc87..cf2783f51 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -66,6 +66,7 @@
     TrainingPipeline,
     set_deterministic_cudnn,
     set_seed,
+    download_checkpoint_from_wandb,
 )
 from allenact.utils.system import get_logger
 from allenact.utils.tensor_utils import batch_observations, detach_recursively
@@ -1900,6 +1901,14 @@ def train(
         # noinspection PyBroadException
         try:
             if checkpoint_file_name is not None:
+                if "wandb://" == checkpoint_file_name[:8]:
+                    ckpt_dir = "wandb_ckpts"
+                    os.makedirs(ckpt_dir, exist_ok=True)
+                    checkpoint_file_name = download_checkpoint_from_wandb(
+                        checkpoint_path_dir_or_pattern,
+                        ckpt_dir,
+                        only_allow_one_ckpt=True
+                    )
                 self.checkpoint_load(checkpoint_file_name, restart_pipeline)
 
             self.run_pipeline(valid_on_initial_weights=valid_on_initial_weights)
diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index dd33e956d..8a02bba24 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -45,6 +45,7 @@
     ScalarMeanTracker,
     set_deterministic_cudnn,
     set_seed,
+    download_checkpoint_from_wandb,
 )
 from allenact.utils.misc_utils import (
     NumpyJSONEncoder,
@@ -1501,25 +1502,9 @@ def get_checkpoint_files(
         approx_ckpt_step_interval: Optional[int] = None,
     ):
         if "wandb://" == checkpoint_path_dir_or_pattern[:8]:
-            import wandb
-            import shutil
             eval_dir = "wandb_ckpts_to_eval/{}".format(self.local_start_time_str)
             os.makedirs(eval_dir, exist_ok=True)
-            api = wandb.Api()
-            run_token = checkpoint_path_dir_or_pattern.split("//")[1]
-            ckpt_steps = checkpoint_path_dir_or_pattern.split("//")[2:]
-            if ckpt_steps[-1] == "":
-                ckpt_steps = ckpt_steps[:-1]
-            ckpts_paths = []
-            for steps in ckpt_steps:
-                ckpt_fn = "{}-step-{}:latest".format(run_token, steps)
-                artifact = api.artifact(ckpt_fn)
-                _ = artifact.download("tmp")
-                ckpt_dir = "{}/ckpt-{}.pt".format(eval_dir, steps)
-                shutil.move("tmp/ckpt.pt", ckpt_dir)
-                ckpts_paths.append(ckpt_dir)
-            shutil.rmtree("tmp")
-            return ckpts_paths
+            return download_checkpoint_from_wandb(checkpoint_path_dir_or_pattern, eval_dir, only_allow_one_ckpt=False)
 
         if os.path.isdir(checkpoint_path_dir_or_pattern):
             # The fragment is a path to a directory, lets use this directory
diff --git a/allenact/utils/experiment_utils.py b/allenact/utils/experiment_utils.py
index 87f6bbbaf..995257432 100644
--- a/allenact/utils/experiment_utils.py
+++ b/allenact/utils/experiment_utils.py
@@ -26,6 +26,8 @@
 import numpy as np
 import torch
 import torch.optim as optim
+import wandb
+import shutil
 
 from allenact.algorithms.offpolicy_sync.losses.abstract_offpolicy_loss import Memory
 from allenact.algorithms.onpolicy_sync.losses.abstract_loss import (
@@ -1186,3 +1188,31 @@ def current_stage_losses(
             )
             for loss_name in self.current_stage.loss_names
         }
+
+
+def download_checkpoint_from_wandb(checkpoint_path_dir_or_pattern, all_ckpt_dir, only_allow_one_ckpt=False):
+    api = wandb.Api()
+    run_token = checkpoint_path_dir_or_pattern.split("//")[1]
+    ckpt_steps = checkpoint_path_dir_or_pattern.split("//")[2:]
+    if ckpt_steps[-1] == "":
+        ckpt_steps = ckpt_steps[:-1]
+    if not only_allow_one_ckpt:
+        ckpts_paths = []
+        for steps in ckpt_steps:
+            ckpt_fn = "{}-step-{}:latest".format(run_token, steps)
+            artifact = api.artifact(ckpt_fn)
+            _ = artifact.download("tmp")
+            ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, steps)
+            shutil.move("tmp/ckpt.pt", ckpt_dir)
+            ckpts_paths.append(ckpt_dir)
+        shutil.rmtree("tmp")
+        return ckpts_paths
+    else:
+        assert len(ckpt_steps) == 1
+        ckpt_fn = "{}-step-{}:latest".format(run_token, steps)
+        artifact = api.artifact(ckpt_fn)
+        _ = artifact.download("tmp")
+        ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, steps)
+        shutil.move("tmp/ckpt.pt", ckpt_dir)
+        shutil.rmtree("tmp")
+        return ckpt_dir
\ No newline at end of file

From fd8d77b6c28cf68329ad3990545f9f3d32db0640 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Thu, 11 Jul 2024 14:39:17 -0700
Subject: [PATCH 14/26] using /tmp instead of tmp

---
 allenact/algorithms/onpolicy_sync/engine.py |  2 +-
 allenact/algorithms/onpolicy_sync/runner.py |  2 +-
 allenact/utils/experiment_utils.py          | 10 ++++------
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index cf2783f51..fdd8702f4 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -1902,7 +1902,7 @@ def train(
         try:
             if checkpoint_file_name is not None:
                 if "wandb://" == checkpoint_file_name[:8]:
-                    ckpt_dir = "wandb_ckpts"
+                    ckpt_dir = "/tmp/wandb_ckpts"
                     os.makedirs(ckpt_dir, exist_ok=True)
                     checkpoint_file_name = download_checkpoint_from_wandb(
                         checkpoint_path_dir_or_pattern,
diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index 8a02bba24..368021f66 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -1502,7 +1502,7 @@ def get_checkpoint_files(
         approx_ckpt_step_interval: Optional[int] = None,
     ):
         if "wandb://" == checkpoint_path_dir_or_pattern[:8]:
-            eval_dir = "wandb_ckpts_to_eval/{}".format(self.local_start_time_str)
+            eval_dir = "/tmp/wandb_ckpts_to_eval/{}".format(self.local_start_time_str)
             os.makedirs(eval_dir, exist_ok=True)
             return download_checkpoint_from_wandb(checkpoint_path_dir_or_pattern, eval_dir, only_allow_one_ckpt=False)
 
diff --git a/allenact/utils/experiment_utils.py b/allenact/utils/experiment_utils.py
index 995257432..9a1dfcbad 100644
--- a/allenact/utils/experiment_utils.py
+++ b/allenact/utils/experiment_utils.py
@@ -1201,18 +1201,16 @@ def download_checkpoint_from_wandb(checkpoint_path_dir_or_pattern, all_ckpt_dir,
         for steps in ckpt_steps:
             ckpt_fn = "{}-step-{}:latest".format(run_token, steps)
             artifact = api.artifact(ckpt_fn)
-            _ = artifact.download("tmp")
+            _ = artifact.download("/tmp")
             ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, steps)
-            shutil.move("tmp/ckpt.pt", ckpt_dir)
+            shutil.move("/tmp/ckpt.pt", ckpt_dir)
             ckpts_paths.append(ckpt_dir)
-        shutil.rmtree("tmp")
         return ckpts_paths
     else:
         assert len(ckpt_steps) == 1
         ckpt_fn = "{}-step-{}:latest".format(run_token, steps)
         artifact = api.artifact(ckpt_fn)
-        _ = artifact.download("tmp")
+        _ = artifact.download("/tmp")
         ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, steps)
-        shutil.move("tmp/ckpt.pt", ckpt_dir)
-        shutil.rmtree("tmp")
+        shutil.move("/tmp/ckpt.pt", ckpt_dir)
         return ckpt_dir
\ No newline at end of file

From 1a43d36d842ffca62733d7a717c8f4c1ade67870 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Thu, 11 Jul 2024 18:13:06 -0700
Subject: [PATCH 15/26] fix bug

---
 allenact/algorithms/onpolicy_sync/engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index fdd8702f4..247e2382c 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -1905,7 +1905,7 @@ def train(
                     ckpt_dir = "/tmp/wandb_ckpts"
                     os.makedirs(ckpt_dir, exist_ok=True)
                     checkpoint_file_name = download_checkpoint_from_wandb(
-                        checkpoint_path_dir_or_pattern,
+                        checkpoint_file_name,
                         ckpt_dir,
                         only_allow_one_ckpt=True
                     )

From 9aedd25119032fcb9999d3489606db6b195f42dd Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Thu, 11 Jul 2024 19:09:40 -0700
Subject: [PATCH 16/26] fix bug

---
 allenact/utils/experiment_utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/allenact/utils/experiment_utils.py b/allenact/utils/experiment_utils.py
index 9a1dfcbad..a20ef6d4a 100644
--- a/allenact/utils/experiment_utils.py
+++ b/allenact/utils/experiment_utils.py
@@ -1208,9 +1208,10 @@ def download_checkpoint_from_wandb(checkpoint_path_dir_or_pattern, all_ckpt_dir,
         return ckpts_paths
     else:
         assert len(ckpt_steps) == 1
-        ckpt_fn = "{}-step-{}:latest".format(run_token, steps)
+        step = ckpt_steps[0]
+        ckpt_fn = "{}-step-{}:latest".format(run_token, step)
         artifact = api.artifact(ckpt_fn)
         _ = artifact.download("/tmp")
-        ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, steps)
+        ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, step)
         shutil.move("/tmp/ckpt.pt", ckpt_dir)
         return ckpt_dir
\ No newline at end of file

From 25cbe1153887cd42fa34e63ebe2f44bd8ac7579e Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Fri, 12 Jul 2024 10:35:50 -0700
Subject: [PATCH 17/26] move download_checkpoint_from_wandb from engine to
 runner

---
 allenact/algorithms/onpolicy_sync/engine.py | 8 --------
 allenact/algorithms/onpolicy_sync/runner.py | 9 +++++++++
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index 247e2382c..fa4390de4 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -1901,14 +1901,6 @@ def train(
         # noinspection PyBroadException
         try:
             if checkpoint_file_name is not None:
-                if "wandb://" == checkpoint_file_name[:8]:
-                    ckpt_dir = "/tmp/wandb_ckpts"
-                    os.makedirs(ckpt_dir, exist_ok=True)
-                    checkpoint_file_name = download_checkpoint_from_wandb(
-                        checkpoint_file_name,
-                        ckpt_dir,
-                        only_allow_one_ckpt=True
-                    )
                 self.checkpoint_load(checkpoint_file_name, restart_pipeline)
 
             self.run_pipeline(valid_on_initial_weights=valid_on_initial_weights)
diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index 368021f66..df3189b8c 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -534,6 +534,15 @@ def start_train(
 
         worker_ids = self.local_worker_ids(TRAIN_MODE_STR)
 
+        if "wandb://" == checkpoint[:8]:
+            ckpt_dir = "/tmp/wandb_ckpts"
+            os.makedirs(ckpt_dir, exist_ok=True)
+            checkpoint = download_checkpoint_from_wandb(
+                checkpoint,
+                ckpt_dir,
+                only_allow_one_ckpt=True
+            )
+
         model_hash = None
         for trainer_id in worker_ids:
             training_kwargs = dict(

From ecc24a900a95a469b4cc80fbbd5591052440766f Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Fri, 12 Jul 2024 15:17:05 -0700
Subject: [PATCH 18/26] move download_checkpoint_from_wandb from engine to
 runner

---
 allenact/algorithms/onpolicy_sync/runner.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index df3189b8c..f5f3bd641 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -534,14 +534,15 @@ def start_train(
 
         worker_ids = self.local_worker_ids(TRAIN_MODE_STR)
 
-        if "wandb://" == checkpoint[:8]:
-            ckpt_dir = "/tmp/wandb_ckpts"
-            os.makedirs(ckpt_dir, exist_ok=True)
-            checkpoint = download_checkpoint_from_wandb(
-                checkpoint,
-                ckpt_dir,
-                only_allow_one_ckpt=True
-            )
+        if checkpoint is not None:
+            if checkpoint[:8] == "wandb://":
+                ckpt_dir = "/tmp/wandb_ckpts"
+                os.makedirs(ckpt_dir, exist_ok=True)
+                checkpoint = download_checkpoint_from_wandb(
+                    checkpoint,
+                    ckpt_dir,
+                    only_allow_one_ckpt=True
+                )
 
         model_hash = None
         for trainer_id in worker_ids:

From 52dccc88ff8a50124e4e3d65ef49a6202398ca59 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Mon, 15 Jul 2024 10:45:37 -0700
Subject: [PATCH 19/26] make ckpt saving at every host an option

---
 allenact/algorithms/onpolicy_sync/engine.py |  9 ++++-----
 allenact/algorithms/onpolicy_sync/runner.py |  2 ++
 allenact/main.py                            | 11 +++++++++++
 allenact/utils/experiment_utils.py          |  8 ++++----
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index fa4390de4..64b8e1644 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -66,7 +66,6 @@
     TrainingPipeline,
     set_deterministic_cudnn,
     set_seed,
-    download_checkpoint_from_wandb,
 )
 from allenact.utils.system import get_logger
 from allenact.utils.tensor_utils import batch_observations, detach_recursively
@@ -1176,6 +1175,7 @@ def __init__(
         max_sampler_processes_per_worker: Optional[int] = None,
         save_ckpt_after_every_pipeline_stage: bool = True,
         first_local_worker_id: int = 0,
+        save_ckpt_at_every_host: bool = False,
         **kwargs,
     ):
         kwargs["mode"] = TRAIN_MODE_STR
@@ -1267,6 +1267,7 @@ def __init__(
         )
 
         self.first_local_worker_id = first_local_worker_id
+        self.save_ckpt_at_every_host = save_ckpt_at_every_host
 
     def advance_seed(
         self, seed: Optional[int], return_same_seed_per_worker=False
@@ -1539,8 +1540,7 @@ def _save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_co
     ):
         model_path = None
         self.deterministic_seeds()
-        # if self.worker_id == self.first_local_worker_id:
-        if self.worker_id == 0:
+        if (self.save_ckpt_at_every_host and self.worker_id == self.first_local_worker_id) or self.worker_id == 0:
             model_path = self.checkpoint_save(pipeline_stage_index=pipeline_stage_index)
             if self.checkpoints_queue is not None:
                 self.checkpoints_queue.put(("eval", model_path))
@@ -1581,8 +1581,7 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
             and should_save_checkpoints
             and self.checkpoints_queue is not None
         ):
-            # if self.worker_id == self.first_local_worker_id:
-            if self.worker_id == 0:
+            if (self.save_ckpt_at_every_host and self.worker_id == self.first_local_worker_id) or self.worker_id == 0:
                 model_path = self.checkpoint_save()
                 if self.checkpoints_queue is not None:
                     self.checkpoints_queue.put(("eval", model_path))
diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index f5f3bd641..021a36e02 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -501,6 +501,7 @@ def start_train(
         collect_valid_results: bool = False,
         valid_on_initial_weights: bool = False,
         try_restart_after_task_error: bool = False,
+        save_ckpt_at_every_host: bool = False,
     ):
         self._initialize_start_train_or_start_test()
 
@@ -574,6 +575,7 @@ def start_train(
                 distributed_preemption_threshold=self.distributed_preemption_threshold,
                 valid_on_initial_weights=valid_on_initial_weights,
                 try_restart_after_task_error=try_restart_after_task_error,
+                save_ckpt_at_every_host=save_ckpt_at_every_host,
             )
             train: BaseProcess = self.mp_ctx.Process(
                 target=self.train_loop,
diff --git a/allenact/main.py b/allenact/main.py
index 138b5c6f1..cfb852507 100755
--- a/allenact/main.py
+++ b/allenact/main.py
@@ -274,6 +274,16 @@ def get_argument_parser():
         " tutorial https://allenact.org/tutorials/distributed-objectnav-tutorial/",
     )
 
+    parser.add_argument(
+        "--save_ckpt_at_every_host",
+        dest="save_ckpt_at_every_host",
+        action="store_true",
+        required=False,
+        help="if you pass the `--save_ckpt_at_every_host` flag, AllenAct will save checkpoints at every host as the"
+             " the training progresses in distributed training mode.",
+    )
+    parser.set_defaults(save_ckpt_at_every_host=False)
+
     parser.add_argument(
         "--callbacks",
         dest="callbacks",
@@ -484,6 +494,7 @@ def main():
             collect_valid_results=args.collect_valid_results,
             valid_on_initial_weights=args.valid_on_initial_weights,
             try_restart_after_task_error=args.enable_crash_recovery,
+            save_ckpt_at_every_host=save_ckpt_at_every_host,
         )
     else:
         OnPolicyRunner(
diff --git a/allenact/utils/experiment_utils.py b/allenact/utils/experiment_utils.py
index a20ef6d4a..0ace2770d 100644
--- a/allenact/utils/experiment_utils.py
+++ b/allenact/utils/experiment_utils.py
@@ -1201,9 +1201,9 @@ def download_checkpoint_from_wandb(checkpoint_path_dir_or_pattern, all_ckpt_dir,
         for steps in ckpt_steps:
             ckpt_fn = "{}-step-{}:latest".format(run_token, steps)
             artifact = api.artifact(ckpt_fn)
-            _ = artifact.download("/tmp")
+            _ = artifact.download(all_ckpt_dir)
             ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, steps)
-            shutil.move("/tmp/ckpt.pt", ckpt_dir)
+            shutil.move("{}/ckpt.pt".format(all_ckpt_dir), ckpt_dir)
             ckpts_paths.append(ckpt_dir)
         return ckpts_paths
     else:
@@ -1211,7 +1211,7 @@ def download_checkpoint_from_wandb(checkpoint_path_dir_or_pattern, all_ckpt_dir,
         step = ckpt_steps[0]
         ckpt_fn = "{}-step-{}:latest".format(run_token, step)
         artifact = api.artifact(ckpt_fn)
-        _ = artifact.download("/tmp")
+        _ = artifact.download(all_ckpt_dir)
         ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, step)
-        shutil.move("/tmp/ckpt.pt", ckpt_dir)
+        shutil.move("{}/ckpt.pt".format(all_ckpt_dir), ckpt_dir)
         return ckpt_dir
\ No newline at end of file

From f1e97135cbcee83a4a0d0b9ebf27051dc8e48ade Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Mon, 15 Jul 2024 11:53:59 -0700
Subject: [PATCH 20/26] add wandb to requirements

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index f9f08b4fb..168c45b6a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -31,4 +31,5 @@ torchvision>=0.7.0,<0.10.0
 tqdm==4.56.0
 urllib3==1.26.5
 attr
-attrs
\ No newline at end of file
+attrs
+wandb
\ No newline at end of file

From 65c1b1a4dab256dbd58ba2169e18684db49f50a6 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Mon, 15 Jul 2024 12:53:57 -0700
Subject: [PATCH 21/26] modify pytest

---
 .github/workflows/pytest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 1875d9682..169e47657 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -23,7 +23,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install pytest
+        python -m pip install pytest wandb
         python -m pip install --editable="./allenact"
         python -m pip install --editable="./allenact_plugins[all]"
         python -m pip install pip install -r allenact_plugins/babyai_plugin/extra_requirements.txt # Required as babyai is not on PyPI

From cd9084119b77a8212d584afbf0a760b253890551 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Mon, 15 Jul 2024 13:31:13 -0700
Subject: [PATCH 22/26] auto format

---
 allenact/algorithms/onpolicy_sync/engine.py | 18 +++++++++++++-----
 allenact/algorithms/onpolicy_sync/runner.py |  8 ++++----
 allenact/main.py                            |  2 +-
 allenact/utils/experiment_utils.py          | 11 ++++++++---
 conda/environment-dev.yml                   |  2 +-
 dev_requirements.txt                        |  2 +-
 6 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/allenact/algorithms/onpolicy_sync/engine.py b/allenact/algorithms/onpolicy_sync/engine.py
index 64b8e1644..d032d190b 100644
--- a/allenact/algorithms/onpolicy_sync/engine.py
+++ b/allenact/algorithms/onpolicy_sync/engine.py
@@ -1540,7 +1540,10 @@ def _save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_co
     ):
         model_path = None
         self.deterministic_seeds()
-        if (self.save_ckpt_at_every_host and self.worker_id == self.first_local_worker_id) or self.worker_id == 0:
+        if (
+            self.save_ckpt_at_every_host
+            and self.worker_id == self.first_local_worker_id
+        ) or self.worker_id == 0:
             model_path = self.checkpoint_save(pipeline_stage_index=pipeline_stage_index)
             if self.checkpoints_queue is not None:
                 self.checkpoints_queue.put(("eval", model_path))
@@ -1581,7 +1584,10 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
             and should_save_checkpoints
             and self.checkpoints_queue is not None
         ):
-            if (self.save_ckpt_at_every_host and self.worker_id == self.first_local_worker_id) or self.worker_id == 0:
+            if (
+                self.save_ckpt_at_every_host
+                and self.worker_id == self.first_local_worker_id
+            ) or self.worker_id == 0:
                 model_path = self.checkpoint_save()
                 if self.checkpoints_queue is not None:
                     self.checkpoints_queue.put(("eval", model_path))
@@ -1851,10 +1857,12 @@ def run_pipeline(self, valid_on_initial_weights: bool = False):
             # a pipeline stage completes is controlled above
             checkpoint_file_name = None
             if should_save_checkpoints and (
-                    self.training_pipeline.total_steps - self.last_save
-                    >= cur_stage_training_settings.save_interval
+                self.training_pipeline.total_steps - self.last_save
+                >= cur_stage_training_settings.save_interval
             ):
-                checkpoint_file_name = self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter()
+                checkpoint_file_name = (
+                    self._save_checkpoint_then_send_checkpoint_for_validation_and_update_last_save_counter()
+                )
                 already_saved_checkpoint = True
 
             if (
diff --git a/allenact/algorithms/onpolicy_sync/runner.py b/allenact/algorithms/onpolicy_sync/runner.py
index 021a36e02..84f4f2c5b 100644
--- a/allenact/algorithms/onpolicy_sync/runner.py
+++ b/allenact/algorithms/onpolicy_sync/runner.py
@@ -540,9 +540,7 @@ def start_train(
                 ckpt_dir = "/tmp/wandb_ckpts"
                 os.makedirs(ckpt_dir, exist_ok=True)
                 checkpoint = download_checkpoint_from_wandb(
-                    checkpoint,
-                    ckpt_dir,
-                    only_allow_one_ckpt=True
+                    checkpoint, ckpt_dir, only_allow_one_ckpt=True
                 )
 
         model_hash = None
@@ -1516,7 +1514,9 @@ def get_checkpoint_files(
         if "wandb://" == checkpoint_path_dir_or_pattern[:8]:
             eval_dir = "/tmp/wandb_ckpts_to_eval/{}".format(self.local_start_time_str)
             os.makedirs(eval_dir, exist_ok=True)
-            return download_checkpoint_from_wandb(checkpoint_path_dir_or_pattern, eval_dir, only_allow_one_ckpt=False)
+            return download_checkpoint_from_wandb(
+                checkpoint_path_dir_or_pattern, eval_dir, only_allow_one_ckpt=False
+            )
 
         if os.path.isdir(checkpoint_path_dir_or_pattern):
             # The fragment is a path to a directory, lets use this directory
diff --git a/allenact/main.py b/allenact/main.py
index cfb852507..8fcce3fea 100755
--- a/allenact/main.py
+++ b/allenact/main.py
@@ -280,7 +280,7 @@ def get_argument_parser():
         action="store_true",
         required=False,
         help="if you pass the `--save_ckpt_at_every_host` flag, AllenAct will save checkpoints at every host as the"
-             " the training progresses in distributed training mode.",
+        " the training progresses in distributed training mode.",
     )
     parser.set_defaults(save_ckpt_at_every_host=False)
 
diff --git a/allenact/utils/experiment_utils.py b/allenact/utils/experiment_utils.py
index 0ace2770d..f123e87a6 100644
--- a/allenact/utils/experiment_utils.py
+++ b/allenact/utils/experiment_utils.py
@@ -763,7 +763,10 @@ def add_stage_component(self, stage_component: StageComponent):
         self.stage_component_uuid_to_stream_memory[stage_component.uuid] = Memory()
 
     def __setattr__(self, key: str, value: Any):
-        if key not in ["training_settings", "callback_to_change_engine_attributes"] and self.training_settings.has_key(key):
+        if key not in [
+            "training_settings",
+            "callback_to_change_engine_attributes",
+        ] and self.training_settings.has_key(key):
             raise NotImplementedError(
                 f"Cannot set {key} in {self.__name__}, update the"
                 f" `training_settings` attribute of {self.__name__} instead."
@@ -1190,7 +1193,9 @@ def current_stage_losses(
         }
 
 
-def download_checkpoint_from_wandb(checkpoint_path_dir_or_pattern, all_ckpt_dir, only_allow_one_ckpt=False):
+def download_checkpoint_from_wandb(
+    checkpoint_path_dir_or_pattern, all_ckpt_dir, only_allow_one_ckpt=False
+):
     api = wandb.Api()
     run_token = checkpoint_path_dir_or_pattern.split("//")[1]
     ckpt_steps = checkpoint_path_dir_or_pattern.split("//")[2:]
@@ -1214,4 +1219,4 @@ def download_checkpoint_from_wandb(checkpoint_path_dir_or_pattern, all_ckpt_dir,
         _ = artifact.download(all_ckpt_dir)
         ckpt_dir = "{}/ckpt-{}.pt".format(all_ckpt_dir, step)
         shutil.move("{}/ckpt.pt".format(all_ckpt_dir), ckpt_dir)
-        return ckpt_dir
\ No newline at end of file
+        return ckpt_dir
diff --git a/conda/environment-dev.yml b/conda/environment-dev.yml
index d29cf197b..9e010c03c 100644
--- a/conda/environment-dev.yml
+++ b/conda/environment-dev.yml
@@ -2,7 +2,7 @@ channels:
   - defaults
   - conda-forge
 dependencies:
-  - black=19.10b0
+  - black>=24.2.0
   - docformatter>=1.3.1
   - gitpython
   - markdown>=3.3
diff --git a/dev_requirements.txt b/dev_requirements.txt
index ef100a0ab..f348cc31e 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -1,4 +1,4 @@
-black==19.10b0
+black==24.2.0
 ruamel.yaml
 gitpython
 markdown==3.3

From 1c8788cb04651610eb77f9250d4f1226ce713bfc Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Mon, 15 Jul 2024 20:16:23 -0700
Subject: [PATCH 23/26] update pillow.py

---
 allenact/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/allenact/setup.py b/allenact/setup.py
index a3075cabd..3495ca9ad 100644
--- a/allenact/setup.py
+++ b/allenact/setup.py
@@ -118,7 +118,7 @@ def _do_setup():
             "moviepy>=1.0.3",
             "filelock",
             "numpy>=1.19.1",
-            "Pillow>=8.2.0",
+            "Pillow>=8.2.0,<=10.2.0",
             "matplotlib>=3.3.1",
             "networkx",
             "opencv-python",

From 06ea4431572f9a6cd970c50b4dc5ca968adf1d44 Mon Sep 17 00:00:00 2001
From: Hao <khzeng@allenai.org>
Date: Mon, 15 Jul 2024 20:22:38 -0700
Subject: [PATCH 24/26] Create black.yml

---
 .github/workflows/black.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 .github/workflows/black.yml

diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
new file mode 100644
index 000000000..9065b5e02
--- /dev/null
+++ b/.github/workflows/black.yml
@@ -0,0 +1,10 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: psf/black@stable

From c33bf0734bfeb91c095b6c7ab4d27084d1a99c08 Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Mon, 15 Jul 2024 20:23:52 -0700
Subject: [PATCH 25/26] update pillow version

---
 allenact/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/allenact/setup.py b/allenact/setup.py
index 3495ca9ad..337bbd261 100644
--- a/allenact/setup.py
+++ b/allenact/setup.py
@@ -118,7 +118,7 @@ def _do_setup():
             "moviepy>=1.0.3",
             "filelock",
             "numpy>=1.19.1",
-            "Pillow>=8.2.0,<=10.2.0",
+            "Pillow>=8.2.0,<9.0.0",
             "matplotlib>=3.3.1",
             "networkx",
             "opencv-python",

From e9f2e311dc5bf92b308b6143ec7dd2694120419f Mon Sep 17 00:00:00 2001
From: KuoHaoZeng <khzeng@allenai.org>
Date: Mon, 15 Jul 2024 20:50:15 -0700
Subject: [PATCH 26/26] update torchvision and pil version in pytest.py

---
 allenact/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/allenact/setup.py b/allenact/setup.py
index 337bbd261..12a23db37 100644
--- a/allenact/setup.py
+++ b/allenact/setup.py
@@ -112,13 +112,13 @@ def _do_setup():
         install_requires=[
             "gym==0.17.*",  # Newer versions of gym are now broken with updates to setuptools
             "torch>=1.6.0,!=1.8.0",
-            "torchvision>=0.7.0",
+            "torchvision>=0.7.0,<=0.16.2",
             "tensorboardx>=2.1",
             "setproctitle",
             "moviepy>=1.0.3",
             "filelock",
             "numpy>=1.19.1",
-            "Pillow>=8.2.0,<9.0.0",
+            "Pillow>=8.2.0,<10.3.0",
             "matplotlib>=3.3.1",
             "networkx",
             "opencv-python",