From 7310811f5cee99a938c0bd33837deecf14ea9689 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Wed, 26 Feb 2025 22:38:58 +0100 Subject: [PATCH] access config_root not _config --- garak/probes/continuation.py | 2 +- garak/probes/dan.py | 2 +- garak/probes/glitch.py | 2 +- garak/probes/latentinjection.py | 22 ++++++++++------------ garak/probes/phrasing.py | 11 ++++++----- garak/probes/promptinject.py | 2 +- 6 files changed, 20 insertions(+), 21 deletions(-) diff --git a/garak/probes/continuation.py b/garak/probes/continuation.py index 830dab9f..b000ade1 100644 --- a/garak/probes/continuation.py +++ b/garak/probes/continuation.py @@ -78,7 +78,7 @@ def __init__(self, config_root=_config): if self.follow_prompt_cap: num_ids_to_delete = max( - 0, len(self.prompts) - _config.run.soft_probe_prompt_cap + 0, len(self.prompts) - config_root.run.soft_probe_prompt_cap ) ids_to_rm = random.sample(range(len(self.prompts)), num_ids_to_delete) # delete in descending order diff --git a/garak/probes/dan.py b/garak/probes/dan.py index e777903e..39bfa619 100644 --- a/garak/probes/dan.py +++ b/garak/probes/dan.py @@ -661,5 +661,5 @@ class DanInTheWild(DanInTheWildFull): def __init__(self, config_root=_config): super().__init__(config_root=config_root) self.prompts = random.sample(self.prompts, len(self.prompts))[ - : _config.run.soft_probe_prompt_cap + : config_root.run.soft_probe_prompt_cap ] diff --git a/garak/probes/glitch.py b/garak/probes/glitch.py index b936e45a..44645536 100644 --- a/garak/probes/glitch.py +++ b/garak/probes/glitch.py @@ -229,4 +229,4 @@ class Glitch(GlitchFull): def __init__(self, config_root=_config): super().__init__(config_root) - self.max_prompts = _config.run.soft_probe_prompt_cap + self.max_prompts = config_root.run.soft_probe_prompt_cap diff --git a/garak/probes/latentinjection.py b/garak/probes/latentinjection.py index 95919030..ffab1039 100644 --- a/garak/probes/latentinjection.py +++ b/garak/probes/latentinjection.py @@ -29,7 +29,7 @@ class LatentInjectionMixin: injection_sep_pairs = [("", "")] # optional attrib get a default follow_prompt_cap = False - def _build_prompts_triggers(self) -> None: + def _build_prompts_triggers(self, cap=None) -> None: self.prompts = [] self.triggers = [] @@ -63,10 +63,8 @@ def _build_prompts_triggers(self) -> None: self.prompts.append(complete_prompt) self.triggers.append(payload_trigger) - if self.follow_prompt_cap: - num_ids_to_delete = max( - 0, len(self.prompts) - _config.run.soft_probe_prompt_cap - ) + if self.follow_prompt_cap and cap is not None: + num_ids_to_delete = max(0, len(self.prompts) - cap) ids_to_rm = random.sample(range(len(self.prompts)), num_ids_to_delete) # delete in descending order ids_to_rm = sorted(ids_to_rm, reverse=True) @@ -114,7 +112,7 @@ class LatentInjectionTranslationEnFrFull(Probe, LatentInjectionMixin): def __init__(self, config_root=_config): super().__init__(config_root) self.payload_triggers += garak.payloads.load(self.payload_name).payloads - self._build_prompts_triggers() + self._build_prompts_triggers(cap=config_root.run.soft_probe_prompt_cap) def _attempt_prestore_hook(self, attempt: Attempt, seq: int) -> Attempt: return LatentInjectionMixin._attempt_prestore_hook(self, attempt, seq) @@ -278,7 +276,7 @@ class LatentInjectionReportFull(Probe, LatentInjectionMixin): def __init__(self, config_root=_config): super().__init__(config_root) self.payload_triggers = garak.payloads.load(self.payload_name).payloads - self._build_prompts_triggers() + self._build_prompts_triggers(cap=config_root.run.soft_probe_prompt_cap) def _attempt_prestore_hook(self, attempt: Attempt, seq: int) -> Attempt: return LatentInjectionMixin._attempt_prestore_hook(self, attempt, seq) @@ -386,7 +384,7 @@ class LatentInjectionResumeFull(Probe, LatentInjectionMixin): def __init__(self, config_root=_config): super().__init__(config_root) - self._build_prompts_triggers() + self._build_prompts_triggers(cap=config_root.run.soft_probe_prompt_cap) def _attempt_prestore_hook(self, attempt: Attempt, seq: int) -> Attempt: return LatentInjectionMixin._attempt_prestore_hook(self, attempt, seq) @@ -449,7 +447,7 @@ def __init__(self, config_root=_config): + "\n".join(self.paragraphs[i:]) ) - self._build_prompts_triggers() + self._build_prompts_triggers(cap=config_root.run.soft_probe_prompt_cap) def _attempt_prestore_hook(self, attempt: Attempt, seq: int) -> Attempt: return LatentInjectionMixin._attempt_prestore_hook(self, attempt, seq) @@ -547,7 +545,7 @@ class LatentJailbreakFull(LatentInjectionMixin, Probe): def __init__(self, config_root=_config): super().__init__(config_root) - self._build_prompts_triggers() + self._build_prompts_triggers(cap=config_root.run.soft_probe_prompt_cap) def _attempt_prestore_hook(self, attempt: Attempt, seq: int) -> Attempt: return LatentInjectionMixin._attempt_prestore_hook(self, attempt, seq) @@ -602,7 +600,7 @@ class LatentWhois(Probe, LatentInjectionMixin): def __init__(self, config_root=_config): super().__init__(config_root) - self._build_prompts_triggers() + self._build_prompts_triggers(cap=config_root.run.soft_probe_prompt_cap) def _attempt_prestore_hook(self, attempt: Attempt, seq: int) -> Attempt: return LatentInjectionMixin._attempt_prestore_hook(self, attempt, seq) @@ -659,7 +657,7 @@ def __init__(self, config_root=_config): context = context.replace("{INJECTION_MARKER}", self.non_injection_text) self.contexts.append(context) - self._build_prompts_triggers() + self._build_prompts_triggers(cap=config_root.run.soft_probe_prompt_cap) def _attempt_prestore_hook(self, attempt: Attempt, seq: int) -> Attempt: return LatentInjectionMixin._attempt_prestore_hook(self, attempt, seq) diff --git a/garak/probes/phrasing.py b/garak/probes/phrasing.py index aa328f66..6ec52a5f 100644 --- a/garak/probes/phrasing.py +++ b/garak/probes/phrasing.py @@ -14,9 +14,10 @@ class TenseMini: - def _minify_prompts(self): - random.shuffle(self.prompts) - self.prompts = self.prompts[: _config.run.soft_probe_prompt_cap] + def _minify_prompts(self, cap=None): + if cap is not None: + random.shuffle(self.prompts) + self.prompts = self.prompts[:cap] class PastTenseFull(Probe): @@ -61,7 +62,7 @@ class PastTense(PastTenseFull, TenseMini): def __init__(self, config_root=_config): super().__init__(config_root=config_root) - self._minify_prompts() + self._minify_prompts(cap=config_root.run.soft_probe_prompt_cap) class FutureTenseFull(Probe): @@ -107,4 +108,4 @@ class FutureTense(FutureTenseFull, TenseMini): def __init__(self, config_root=_config): super().__init__(config_root=config_root) - self._minify_prompts() + self._minify_prompts(cap=config_root.run.soft_probe_prompt_cap) diff --git a/garak/probes/promptinject.py b/garak/probes/promptinject.py index 4363efed..884cdb5a 100644 --- a/garak/probes/promptinject.py +++ b/garak/probes/promptinject.py @@ -29,7 +29,7 @@ def constructor(self, config_root=_config): prompt_data, ) - self.max_prompts = _config.run.soft_probe_prompt_cap + self.max_prompts = config_root.run.soft_probe_prompt_cap if self.__class__.__name__.endswith("Full"): self.max_prompts = None