Fix-kb-interrupt (#163)

* to fix a bug when it expects a dict * fix the keyboard interrupt bug
ServiceNow · Sep 27, 2024 · df8f6c0 · df8f6c0
1 parent 8da8449
commit df8f6c0
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 7 deletions.
diff --git a/browsergym/experiments/src/browsergym/experiments/agent.py b/browsergym/experiments/src/browsergym/experiments/agent.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Any
 
 from browsergym.core.action.base import AbstractActionSet
@@ -27,7 +27,7 @@ def default_obs_preprocessor(obs: dict) -> dict:
 class AgentInfo:
     think: str = None
     chat_messages: list = None
-    stats: dict = None
+    stats: dict = field(default_factory=dict)
     markup_page: str = ""
     html_page: str = ""
     extra_info: dict = None

diff --git a/browsergym/experiments/src/browsergym/experiments/loop.py b/browsergym/experiments/src/browsergym/experiments/loop.py
@@ -218,6 +218,7 @@ def run(self):
                 logger.debug(f"Agent chose action:\n {action}")
 
                 if action is None:
+                    # will end the episode after saving the step info.
                     step_info.truncated = True
 
                 step_info.save_step_info(self.exp_dir)
@@ -226,13 +227,13 @@ def run(self):
                 _send_chat_info(env.unwrapped.chat, action, step_info.agent_info)
                 logger.debug(f"Chat info sent.")
 
-                step_info = StepInfo(step=step_info.step + 1)
-                episode_info.append(step_info)
-
                 if action is None:
                     logger.debug(f"Agent returned None action. Ending episode.")
                     break
 
+                step_info = StepInfo(step=step_info.step + 1)
+                episode_info.append(step_info)
+
                 logger.debug(f"Sending action to environment.")
                 step_info.from_step(env, action, obs_preprocessor=agent.obs_preprocessor)
                 logger.debug(f"Environment stepped.")
@@ -448,8 +449,6 @@ def _aggregate_episode_stats(episode_info: list[StepInfo]):
     These two summaries should cover many use cases. If more are needed, the
     user can compute other stats by reloading individual StepInfo.
     """
-    # discard the last step since it was not seen by the agent
-    episode_info = episode_info[:-1]
 
     stats = defaultdict(list)
     for step_info in episode_info: